diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
index 5afe102e..49a2e97c 100644
--- a/qai_hub_models/_version.py
+++ b/qai_hub_models/_version.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-__version__ = "0.7.0"
+__version__ = "0.8.0"
diff --git a/qai_hub_models/asset_bases.yaml b/qai_hub_models/asset_bases.yaml
index 124db058..24e36b5f 100644
--- a/qai_hub_models/asset_bases.yaml
+++ b/qai_hub_models/asset_bases.yaml
@@ -6,6 +6,7 @@ model_asset_folder: models/{model_id}/v{version}
 dataset_asset_folder: datasets/{dataset_id}/v{version}
 repo_url: https://github.com/quic/ai-hub-models/blob/main
 qaihm_repo: qai_hub_models/models/{model_id}
+labels_path: qai_hub_models/labels/{labels_file}
 example_use: qai_hub_models/models/{model_id}#example--usage
 huggingface_path: qualcomm/{model_name}
 models_website_url: https://aihub.qualcomm.com
diff --git a/qai_hub_models/evaluators/classification_evaluator.py b/qai_hub_models/evaluators/classification_evaluator.py
index 2c00a4e3..c0f767c7 100644
--- a/qai_hub_models/evaluators/classification_evaluator.py
+++ b/qai_hub_models/evaluators/classification_evaluator.py
@@ -19,20 +19,32 @@ def __init__(self, num_classes: int = 1000):
     def add_batch(self, output: torch.Tensor, gt: int | torch.Tensor):
         # This evaluator supports only 1 output tensor at a time.
         assert len(output.shape) == 2 and output.shape[-1] == self.num_classes
-        gt_tensor = torch.Tensor(gt)
-        assert len(gt_tensor.shape) == 1 and gt_tensor.shape[0] == output.shape[0]
+        gt_tensor = torch.Tensor(gt).unsqueeze(1)
+        assert len(gt_tensor.shape) == 2 and gt_tensor.shape[0] == output.shape[0]
         batch_size = output.shape[0]
         self.total_samples += batch_size
-        self.num_correct += sum(torch.argmax(output, dim=-1) == gt_tensor)
+
+        top5 = torch.topk(output, 5).indices
+        self.top5_count += torch.sum(top5 == gt_tensor).item()
+        self.top1_count += torch.sum(top5[:, :1] == gt_tensor).item()
 
     def reset(self):
-        self.num_correct = 0
+        self.top1_count = 0
+        self.top5_count = 0
         self.total_samples = 0
 
-    def get_accuracy_score(self) -> float:
+    def top1(self) -> float:
         if self.total_samples == 0:
             return 0
-        return self.num_correct / self.total_samples
+        return self.top1_count / self.total_samples
+
+    def top5(self) -> float:
+        if self.total_samples == 0:
+            return 0
+        return self.top5_count / self.total_samples
+
+    def get_accuracy_score(self) -> float:
+        return self.top1()
 
     def formatted_accuracy(self) -> str:
-        return f"{self.get_accuracy_score() * 100:.1f}%"
+        return f"{self.top1() * 100:.1f}% (Top 1), {self.top5() * 100:.1f}% (Top 5)"
diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt
index a9b38438..af284942 100644
--- a/qai_hub_models/global_requirements.txt
+++ b/qai_hub_models/global_requirements.txt
@@ -3,34 +3,45 @@
 # - Then install this requirements file
 # That should create an environment that works for every single model.
 
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
 Deprecated==1.2.11
 PySoundFile; sys_platform == 'win32'
 aimet-torch==1.31.2; sys_platform == "linux"
 albumentations==0.5.2
-av==10.0.0
 basicsr==1.4.2
+boto3==1.34.119
+botocore==1.34.119
 click==8.1.7
+coverage==5.3.1
 data-gradients==0.3.1
 datasets==2.14.5
 diffusers[torch]==0.21.4
 easydict==1.10
 einops==0.3.2
-ffmpeg==1.4
 ftfy==6.1.1
 hydra-core==1.3.0
 imageio[ffmpeg]==2.31.5
 imagesize==1.4.1
+jinja2==3.0.3
+keyrings.envvars==1.1.0; python_version >= '3.9' # used only by CI
 kornia==0.5.0
 librosa==0.10.1
 mmcv==2.1.0
 mmdet==3.2.0
 mmpose==1.2.0
+mypy==0.991
 object-detection-metrics==0.4.post1
 openai-whisper==20230314
+pre-commit==3.5.0
 pycocotools==2.0.7
+pytest-cov==4.1.0
+pytest-xdist==3.3.1
 pytorch-lightning==1.6.0
 rapidfuzz==3.8.1
-regex==2023.12.25
+regex==2023.10.3
+ruamel-yaml==0.18.6
+schema==0.7.5
 scikit-image==0.21.0
 scikit-learn==1.1.3
 scipy==1.8.1
@@ -39,14 +50,18 @@ sentencepiece==0.2.0
 shapely==2.0.3
 soundfile==0.12.1
 stringcase==1.2.0
+tensorboard==2.13.0
 tflite==2.10.0
 thop==0.1.1.post2209072238
-timm==0.9.11
-tensorboard==2.13.0
+timm==1.0.3
 torchaudio==0.13.1
+torchmetrics==1.4.0.post0
 transformers==4.41.1
 treelib==1.6.1
-tucker-conv==1.0.1
+types-PyYAML==6.0.12.12
+types-pillow==10.2.0.20240213
+types-requests==2.31.0.6
+types-tabulate==0.9.0.20240106
 ultralytics==8.0.193
 webdataset==0.2.86
 yacs==0.1.8
diff --git a/qai_hub_models/labels/cityscapes_labels.txt b/qai_hub_models/labels/cityscapes_labels.txt
new file mode 100644
index 00000000..7bd4a33c
--- /dev/null
+++ b/qai_hub_models/labels/cityscapes_labels.txt
@@ -0,0 +1,19 @@
+road
+sidewalk
+building
+wall
+fence
+pole
+traffic light
+traffic sign
+vegetation
+terrain
+sky
+person
+rider
+car
+truck
+bus
+train
+motorcycle
+bicycle
diff --git a/qai_hub_models/labels/coco_labels.txt b/qai_hub_models/labels/coco_labels.txt
new file mode 100644
index 00000000..941cb4e1
--- /dev/null
+++ b/qai_hub_models/labels/coco_labels.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/qai_hub_models/labels/imagenet_labels.txt b/qai_hub_models/labels/imagenet_labels.txt
new file mode 100644
index 00000000..f40829ed
--- /dev/null
+++ b/qai_hub_models/labels/imagenet_labels.txt
@@ -0,0 +1,1000 @@
+tench
+goldfish
+great white shark
+tiger shark
+hammerhead
+electric ray
+stingray
+cock
+hen
+ostrich
+brambling
+goldfinch
+house finch
+junco
+indigo bunting
+robin
+bulbul
+jay
+magpie
+chickadee
+water ouzel
+kite
+bald eagle
+vulture
+great grey owl
+European fire salamander
+common newt
+eft
+spotted salamander
+axolotl
+bullfrog
+tree frog
+tailed frog
+loggerhead
+leatherback turtle
+mud turtle
+terrapin
+box turtle
+banded gecko
+common iguana
+American chameleon
+whiptail
+agama
+frilled lizard
+alligator lizard
+Gila monster
+green lizard
+African chameleon
+Komodo dragon
+African crocodile
+American alligator
+triceratops
+thunder snake
+ringneck snake
+hognose snake
+green snake
+king snake
+garter snake
+water snake
+vine snake
+night snake
+boa constrictor
+rock python
+Indian cobra
+green mamba
+sea snake
+horned viper
+diamondback
+sidewinder
+trilobite
+harvestman
+scorpion
+black and gold garden spider
+barn spider
+garden spider
+black widow
+tarantula
+wolf spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse
+prairie chicken
+peacock
+quail
+partridge
+African grey
+macaw
+sulphur-crested cockatoo
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+drake
+red-breasted merganser
+goose
+black swan
+tusker
+echidna
+platypus
+wallaby
+koala
+wombat
+jellyfish
+sea anemone
+brain coral
+flatworm
+nematode
+conch
+snail
+slug
+sea slug
+chiton
+chambered nautilus
+Dungeness crab
+rock crab
+fiddler crab
+king crab
+American lobster
+spiny lobster
+crayfish
+hermit crab
+isopod
+white stork
+black stork
+spoonbill
+flamingo
+little blue heron
+American egret
+bittern
+crane
+limpkin
+European gallinule
+American coot
+bustard
+ruddy turnstone
+red-backed sandpiper
+redshank
+dowitcher
+oystercatcher
+pelican
+king penguin
+albatross
+grey whale
+killer whale
+dugong
+sea lion
+Chihuahua
+Japanese spaniel
+Maltese dog
+Pekinese
+Shih-Tzu
+Blenheim spaniel
+papillon
+toy terrier
+Rhodesian ridgeback
+Afghan hound
+basset
+beagle
+bloodhound
+bluetick
+black-and-tan coonhound
+Walker hound
+English foxhound
+redbone
+borzoi
+Irish wolfhound
+Italian greyhound
+whippet
+Ibizan hound
+Norwegian elkhound
+otterhound
+Saluki
+Scottish deerhound
+Weimaraner
+Staffordshire bullterrier
+American Staffordshire terrier
+Bedlington terrier
+Border terrier
+Kerry blue terrier
+Irish terrier
+Norfolk terrier
+Norwich terrier
+Yorkshire terrier
+wire-haired fox terrier
+Lakeland terrier
+Sealyham terrier
+Airedale
+cairn
+Australian terrier
+Dandie Dinmont
+Boston bull
+miniature schnauzer
+giant schnauzer
+standard schnauzer
+Scotch terrier
+Tibetan terrier
+silky terrier
+soft-coated wheaten terrier
+West Highland white terrier
+Lhasa
+flat-coated retriever
+curly-coated retriever
+golden retriever
+Labrador retriever
+Chesapeake Bay retriever
+German short-haired pointer
+vizsla
+English setter
+Irish setter
+Gordon setter
+Brittany spaniel
+clumber
+English springer
+Welsh springer spaniel
+cocker spaniel
+Sussex spaniel
+Irish water spaniel
+kuvasz
+schipperke
+groenendael
+malinois
+briard
+kelpie
+komondor
+Old English sheepdog
+Shetland sheepdog
+collie
+Border collie
+Bouvier des Flandres
+Rottweiler
+German shepherd
+Doberman
+miniature pinscher
+Greater Swiss Mountain dog
+Bernese mountain dog
+Appenzeller
+EntleBucher
+boxer
+bull mastiff
+Tibetan mastiff
+French bulldog
+Great Dane
+Saint Bernard
+Eskimo dog
+malamute
+Siberian husky
+dalmatian
+affenpinscher
+basenji
+pug
+Leonberg
+Newfoundland
+Great Pyrenees
+Samoyed
+Pomeranian
+chow
+keeshond
+Brabancon griffon
+Pembroke
+Cardigan
+toy poodle
+miniature poodle
+standard poodle
+Mexican hairless
+timber wolf
+white wolf
+red wolf
+coyote
+dingo
+dhole
+African hunting dog
+hyena
+red fox
+kit fox
+Arctic fox
+grey fox
+tabby
+tiger cat
+Persian cat
+Siamese cat
+Egyptian cat
+cougar
+lynx
+leopard
+snow leopard
+jaguar
+lion
+tiger
+cheetah
+brown bear
+American black bear
+ice bear
+sloth bear
+mongoose
+meerkat
+tiger beetle
+ladybug
+ground beetle
+long-horned beetle
+leaf beetle
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant
+grasshopper
+cricket
+walking stick
+cockroach
+mantis
+cicada
+leafhopper
+lacewing
+dragonfly
+damselfly
+admiral
+ringlet
+monarch
+cabbage butterfly
+sulphur butterfly
+lycaenid
+starfish
+sea urchin
+sea cucumber
+wood rabbit
+hare
+Angora
+hamster
+porcupine
+fox squirrel
+marmot
+beaver
+guinea pig
+sorrel
+zebra
+hog
+wild boar
+warthog
+hippopotamus
+ox
+water buffalo
+bison
+ram
+bighorn
+ibex
+hartebeest
+impala
+gazelle
+Arabian camel
+llama
+weasel
+mink
+polecat
+black-footed ferret
+otter
+skunk
+badger
+armadillo
+three-toed sloth
+orangutan
+gorilla
+chimpanzee
+gibbon
+siamang
+guenon
+patas
+baboon
+macaque
+langur
+colobus
+proboscis monkey
+marmoset
+capuchin
+howler monkey
+titi
+spider monkey
+squirrel monkey
+Madagascar cat
+indri
+Indian elephant
+African elephant
+lesser panda
+giant panda
+barracouta
+eel
+coho
+rock beauty
+anemone fish
+sturgeon
+gar
+lionfish
+puffer
+abacus
+abaya
+academic gown
+accordion
+acoustic guitar
+aircraft carrier
+airliner
+airship
+altar
+ambulance
+amphibian
+analog clock
+apiary
+apron
+ashcan
+assault rifle
+backpack
+bakery
+balance beam
+balloon
+ballpoint
+Band Aid
+banjo
+bannister
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel
+barrow
+baseball
+basketball
+bassinet
+bassoon
+bathing cap
+bath towel
+bathtub
+beach wagon
+beacon
+beaker
+bearskin
+beer bottle
+beer glass
+bell cote
+bib
+bicycle-built-for-two
+bikini
+binder
+binoculars
+birdhouse
+boathouse
+bobsled
+bolo tie
+bonnet
+bookcase
+bookshop
+bottlecap
+bow
+bow tie
+brass
+brassiere
+breakwater
+breastplate
+broom
+bucket
+buckle
+bulletproof vest
+bullet train
+butcher shop
+cab
+caldron
+candle
+cannon
+canoe
+can opener
+cardigan
+car mirror
+carousel
+carpenter's kit
+carton
+car wheel
+cash machine
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello
+cellular telephone
+chain
+chainlink fence
+chain mail
+chain saw
+chest
+chiffonier
+chime
+china cabinet
+Christmas stocking
+church
+cinema
+cleaver
+cliff dwelling
+cloak
+clog
+cocktail shaker
+coffee mug
+coffeepot
+coil
+combination lock
+computer keyboard
+confectionery
+container ship
+convertible
+corkscrew
+cornet
+cowboy boot
+cowboy hat
+cradle
+crane
+crash helmet
+crate
+crib
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam
+desk
+desktop computer
+dial telephone
+diaper
+digital clock
+digital watch
+dining table
+dishrag
+dishwasher
+disk brake
+dock
+dogsled
+dome
+doormat
+drilling platform
+drum
+drumstick
+dumbbell
+Dutch oven
+electric fan
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso maker
+face powder
+feather boa
+file
+fireboat
+fire engine
+fire screen
+flagpole
+flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster
+freight car
+French horn
+frying pan
+fur coat
+garbage truck
+gasmask
+gas pump
+goblet
+go-kart
+golf ball
+golfcart
+gondola
+gong
+gown
+grand piano
+greenhouse
+grille
+grocery store
+guillotine
+hair slide
+hair spray
+half track
+hammer
+hamper
+hand blower
+hand-held computer
+handkerchief
+hard disc
+harmonica
+harp
+harvester
+hatchet
+holster
+home theater
+honeycomb
+hook
+hoopskirt
+horizontal bar
+horse cart
+hourglass
+iPod
+iron
+jack-o'-lantern
+jean
+jeep
+jersey
+jigsaw puzzle
+jinrikisha
+joystick
+kimono
+knee pad
+knot
+lab coat
+ladle
+lampshade
+laptop
+lawn mower
+lens cap
+letter opener
+library
+lifeboat
+lighter
+limousine
+liner
+lipstick
+Loafer
+lotion
+loudspeaker
+loupe
+lumbermill
+magnetic compass
+mailbag
+mailbox
+maillot
+maillot
+manhole cover
+maraca
+marimba
+mask
+matchstick
+maypole
+maze
+measuring cup
+medicine chest
+megalith
+microphone
+microwave
+military uniform
+milk can
+minibus
+miniskirt
+minivan
+missile
+mitten
+mixing bowl
+mobile home
+Model T
+modem
+monastery
+monitor
+moped
+mortar
+mortarboard
+mosque
+mosquito net
+motor scooter
+mountain bike
+mountain tent
+mouse
+mousetrap
+moving van
+muzzle
+nail
+neck brace
+necklace
+nipple
+notebook
+obelisk
+oboe
+ocarina
+odometer
+oil filter
+organ
+oscilloscope
+overskirt
+oxcart
+oxygen mask
+packet
+paddle
+paddlewheel
+padlock
+paintbrush
+pajama
+palace
+panpipe
+paper towel
+parachute
+parallel bars
+park bench
+parking meter
+passenger car
+patio
+pay-phone
+pedestal
+pencil box
+pencil sharpener
+perfume
+Petri dish
+photocopier
+pick
+pickelhaube
+picket fence
+pickup
+pier
+piggy bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate
+pitcher
+plane
+planetarium
+plastic bag
+plate rack
+plow
+plunger
+Polaroid camera
+pole
+police van
+poncho
+pool table
+pop bottle
+pot
+potter's wheel
+power drill
+prayer rug
+printer
+prison
+projectile
+projector
+puck
+punching bag
+purse
+quill
+quilt
+racer
+racket
+radiator
+radio
+radio telescope
+rain barrel
+recreational vehicle
+reel
+reflex camera
+refrigerator
+remote control
+restaurant
+revolver
+rifle
+rocking chair
+rotisserie
+rubber eraser
+rugby ball
+rule
+running shoe
+safe
+safety pin
+saltshaker
+sandal
+sarong
+sax
+scabbard
+scale
+school bus
+schooner
+scoreboard
+screen
+screw
+screwdriver
+seat belt
+sewing machine
+shield
+shoe shop
+shoji
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+ski mask
+sleeping bag
+slide rule
+sliding door
+slot
+snorkel
+snowmobile
+snowplow
+soap dispenser
+soccer ball
+sock
+solar dish
+sombrero
+soup bowl
+space bar
+space heater
+space shuttle
+spatula
+speedboat
+spider web
+spindle
+sports car
+spotlight
+stage
+steam locomotive
+steel arch bridge
+steel drum
+stethoscope
+stole
+stone wall
+stopwatch
+stove
+strainer
+streetcar
+stretcher
+studio couch
+stupa
+submarine
+suit
+sundial
+sunglass
+sunglasses
+sunscreen
+suspension bridge
+swab
+sweatshirt
+swimming trunks
+swing
+switch
+syringe
+table lamp
+tank
+tape player
+teapot
+teddy
+television
+tennis ball
+thatch
+theater curtain
+thimble
+thresher
+throne
+tile roof
+toaster
+tobacco shop
+toilet seat
+torch
+totem pole
+tow truck
+toyshop
+tractor
+trailer truck
+tray
+trench coat
+tricycle
+trimaran
+tripod
+triumphal arch
+trolleybus
+trombone
+tub
+turnstile
+typewriter keyboard
+umbrella
+unicycle
+upright
+vacuum
+vase
+vault
+velvet
+vending machine
+vestment
+viaduct
+violin
+volleyball
+waffle iron
+wall clock
+wallet
+wardrobe
+warplane
+washbasin
+washer
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+wig
+window screen
+window shade
+Windsor tie
+wine bottle
+wing
+wok
+wooden spoon
+wool
+worm fence
+wreck
+yawl
+yurt
+web site
+comic book
+crossword puzzle
+street sign
+traffic light
+book jacket
+menu
+plate
+guacamole
+consomme
+hot pot
+trifle
+ice cream
+ice lolly
+French loaf
+bagel
+pretzel
+cheeseburger
+hotdog
+mashed potato
+head cabbage
+broccoli
+cauliflower
+zucchini
+spaghetti squash
+acorn squash
+butternut squash
+cucumber
+artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith
+strawberry
+orange
+lemon
+fig
+pineapple
+banana
+jackfruit
+custard apple
+pomegranate
+hay
+carbonara
+chocolate sauce
+dough
+meat loaf
+pizza
+potpie
+burrito
+red wine
+espresso
+cup
+eggnog
+alp
+bubble
+cliff
+coral reef
+geyser
+lakeside
+promontory
+sandbar
+seashore
+valley
+volcano
+ballplayer
+groom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper
+corn
+acorn
+hip
+buckeye
+coral fungus
+agaric
+gyromitra
+stinkhorn
+earthstar
+hen-of-the-woods
+bolete
+ear
+toilet tissue
diff --git a/qai_hub_models/labels/voc_labels.txt b/qai_hub_models/labels/voc_labels.txt
new file mode 100644
index 00000000..c724f9aa
--- /dev/null
+++ b/qai_hub_models/labels/voc_labels.txt
@@ -0,0 +1,21 @@
+BACKGROUND
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py
index ff51e7f7..03325642 100644
--- a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py
+++ b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 import os
-from typing import Optional
+from typing import Mapping, Optional, Tuple
 
 import numpy as np
 import torch
@@ -25,6 +25,7 @@
     MODEL_ID,
 )
 from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, SourceAsRoot
+from qai_hub_models.utils.image_processing import pil_resize_pad, pil_undo_resize_pad
 
 
 def _load_cityscapes_loader(cityscapes_path: Optional[str] = None) -> object:
@@ -92,9 +93,11 @@ class CityscapesSegmentationApp:
     def __init__(
         self,
         model: torch.nn.Module,
+        input_specs: Mapping[str, Tuple[Tuple[int, ...], str]],
     ):
         self.model = model
         self.color_mapping = _load_cityscapes_loader().dataset.color_mapping
+        (_, _, self.model_height, self.model_width) = input_specs["image"][0]
 
     def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray:
         """
@@ -111,13 +114,18 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray:
             WIDTH]. Note, that WIDTH and HEIGHT will be smaller than the input
             image.
         """
+        resized_image, scale, padding = pil_resize_pad(
+            image, (self.model_height, self.model_width)
+        )
 
-        input_tensor = preprocess_cityscapes_image(image)
+        input_tensor = preprocess_cityscapes_image(resized_image)
         with torch.no_grad():
             small_res_output = self.model(input_tensor)
 
         output = F.interpolate(
-            small_res_output, (image.height, image.width), mode="bilinear"
+            small_res_output,
+            (resized_image.height, resized_image.width),
+            mode="bilinear",
         )
         if raw_output:
             return output.detach().numpy()
@@ -125,6 +133,9 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray:
 
         color_mask = ImageModule.fromarray(predictions.astype(np.uint8)).convert("P")
         color_mask.putpalette(self.color_mapping)
-        out = ImageModule.blend(image, color_mask.convert("RGB"), 0.5)
+        out = ImageModule.blend(resized_image, color_mask.convert("RGB"), 0.5)
+
+        # Resize / unpad annotated image
+        image_annotated = pil_undo_resize_pad(out, image.size, scale, padding)
 
-        return out
+        return image_annotated
diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py b/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py
index 25921362..2bab019b 100644
--- a/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py
+++ b/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py
@@ -17,12 +17,12 @@
     demo_model_from_cli_args,
     get_model_cli_parser,
     get_on_device_demo_parser,
+    input_spec_from_cli_args,
     validate_on_device_demo_args,
 )
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
 from qai_hub_models.utils.base_model import TargetRuntime
 from qai_hub_models.utils.display import display_or_save_image
-from qai_hub_models.utils.image_processing import pil_resize_pad, pil_undo_resize_pad
 
 # This image showcases the Cityscapes classes (but is not from the dataset)
 TEST_CITYSCAPES_LIKE_IMAGE_NAME = "cityscapes_like_demo_2048x1024.jpg"
@@ -58,22 +58,13 @@ def cityscapes_segmentation_demo(
         image = args.image
         image_name = os.path.basename(image)
 
-    input_spec = model_type.get_input_spec()
-
     inference_model = demo_model_from_cli_args(model_type, model_id, args)
-    app = CityscapesSegmentationApp(inference_model)
-
-    (_, _, height, width) = input_spec["image"][0]
-    orig_image = load_image(image)
-    image, scale, padding = pil_resize_pad(orig_image, (height, width))
+    input_spec = input_spec_from_cli_args(inference_model, args)
+    app = CityscapesSegmentationApp(inference_model, input_spec)
 
     # Run app
-    image_annotated = app.predict(image)
-
-    # Resize / unpad annotated image
-    image_annotated = pil_undo_resize_pad(
-        image_annotated, orig_image.size, scale, padding
-    )
+    orig_image = load_image(image)
+    image_annotated = app.predict(orig_image)
 
     if not is_test:
         display_or_save_image(
diff --git a/qai_hub_models/models/_shared/super_resolution/demo.py b/qai_hub_models/models/_shared/super_resolution/demo.py
index 3ed3cb5d..f901b22d 100644
--- a/qai_hub_models/models/_shared/super_resolution/demo.py
+++ b/qai_hub_models/models/_shared/super_resolution/demo.py
@@ -8,23 +8,35 @@
 from typing import List, Type
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
+from qai_hub_models.models._shared.super_resolution.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+)
 from qai_hub_models.utils.args import (
     demo_model_from_cli_args,
     get_model_cli_parser,
     get_on_device_demo_parser,
     validate_on_device_demo_args,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebAsset, load_image
+from qai_hub_models.utils.asset_loaders import (
+    CachedWebAsset,
+    CachedWebModelAsset,
+    load_image,
+)
 from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.display import display_or_save_image
 
+IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "super_resolution_input.jpg"
+)
+
 
 # Run Super Resolution end-to-end on a sample image.
 # The demo will display both the input image and the higher resolution output.
 def super_resolution_demo(
     model_cls: Type[BaseModel],
     model_id: str,
-    default_image: str | CachedWebAsset,
+    default_image: str | CachedWebAsset = IMAGE_ADDRESS,
     is_test: bool = False,
     available_target_runtimes: List[TargetRuntime] = list(
         TargetRuntime.__members__.values()
diff --git a/qai_hub_models/models/_shared/super_resolution/model.py b/qai_hub_models/models/_shared/super_resolution/model.py
new file mode 100644
index 00000000..8042a4c1
--- /dev/null
+++ b/qai_hub_models/models/_shared/super_resolution/model.py
@@ -0,0 +1,67 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import torch
+
+from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
+from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.input_spec import InputSpec
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 2
+DEFAULT_SCALE_FACTOR = 4
+
+
+def validate_scale_factor(scale_factor: int) -> None:
+    """Only these scales have pre-trained checkpoints available."""
+    valid_scales = [2, 3, 4]
+    assert scale_factor in valid_scales, "`scale_factor` must be in : " + ", ".join(
+        valid_scales
+    )
+
+
+class SuperResolutionModel(BaseModel):
+    """Base Model for Super Resolution."""
+
+    def __init__(
+        self,
+        model: torch.nn.Module,
+        scale_factor: int,
+    ) -> None:
+        super().__init__()
+        self.model = model
+        self.scale_factor = scale_factor
+
+    def get_evaluator(self) -> BaseEvaluator:
+        return SuperResolutionOutputEvaluator()
+
+    def forward(self, image):
+        """
+        Run Super Resolution on `image`, and produce an upscaled image
+
+        Parameters:
+            image: Pixel values pre-processed for model consumption.
+                   Range: float[0, 1]
+                   3-channel Color Space: RGB
+
+        Returns:
+            image: Pixel values
+                   Range: float[0, 1]
+                   3-channel Color Space: RGB
+        """
+        return self.model(image)
+
+    @staticmethod
+    def get_input_spec(
+        batch_size: int = 1,
+        num_channels: int = 3,
+        height: int = 128,
+        width: int = 128,
+    ) -> InputSpec:
+        # Get the input specification ordered (name -> (shape, type)) pairs for this model.
+        #
+        # This can be used with the qai_hub python API to declare
+        # the model input specification upon submitting a profile job.
+        return {"image": ((batch_size, num_channels, height, width), "float32")}
diff --git a/qai_hub_models/models/aotgan/README.md b/qai_hub_models/models/aotgan/README.md
index dd02b51a..89ec5bcb 100644
--- a/qai_hub_models/models/aotgan/README.md
+++ b/qai_hub_models/models/aotgan/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of AOT-GAN can be found
   [here](https://github.com/taki0112/AttnGAN-Tensorflow/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Aggregated Contextual Transformations for High-Resolution Image Inpainting](https://arxiv.org/abs/2104.01431)
diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py
index e86faccd..ab8656d4 100644
--- a/qai_hub_models/models/aotgan/export.py
+++ b/qai_hub_models/models/aotgan/export.py
@@ -190,7 +190,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml
index db1d414b..98f6395e 100644
--- a/qai_hub_models/models/aotgan/perf.yaml
+++ b/qai_hub_models/models/aotgan/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: AOT-GAN
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 164624.0
-      throughput: 6.074448440081641
+      inference_time: 164177.0
+      throughput: 6.0909871662900406
       estimated_peak_memory_range:
-        min: 5124096
-        max: 8396488
+        min: 3293184
+        max: 6670400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: j1glkw4ep
+      job_id: j1gle2z8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 165008.0
-      throughput: 6.06031222728595
+      inference_time: 165278.0
+      throughput: 6.050412033059452
       estimated_peak_memory_range:
-        min: 3850240
-        max: 32305264
+        min: 4321280
+        max: 32279608
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jwgovdz45
+      job_id: jwgoen0xp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:39.212193Z'
+    timestamp: '2024-06-08T22:15:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 120767.0
-      throughput: 8.280407727276492
+      inference_time: 120342.0
+      throughput: 8.309650828472188
       estimated_peak_memory_range:
-        min: 2646016
-        max: 222181760
+        min: 2510848
+        max: 224329120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jw561o2vp
+      job_id: jw56qzj0g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 121460.0
-      throughput: 8.233163181294254
+      inference_time: 121373.0
+      throughput: 8.2390647013751
       estimated_peak_memory_range:
-        min: 1572864
-        max: 147148656
+        min: 0
+        max: 141486816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: j1pvw2q7g
+      job_id: j1pvzrojg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:39.339992Z'
+    timestamp: '2024-06-08T22:15:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 164352.0
-      throughput: 6.084501557632398
+      inference_time: 164129.0
+      throughput: 6.092768493075568
       estimated_peak_memory_range:
-        min: 3293184
-        max: 6536160
+        min: 12288
+        max: 2291528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,14 +124,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: j1p3monxg
+      job_id: j1p3q13l5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 164668.0
-      throughput: 6.072825321252459
+      inference_time: 164665.0
+      throughput: 6.072935960890292
       estimated_peak_memory_range:
-        min: 4333568
-        max: 28875248
+        min: 4337664
+        max: 28704480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -139,7 +139,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jlpev6o75
+      job_id: jlpe4w115
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +148,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:39.469404Z'
+    timestamp: '2024-06-08T22:15:19Z'
   - torchscript_onnx_qnn:
-      inference_time: 145505.0
-      throughput: 6.872616061303735
+      inference_time: 145570.0
+      throughput: 6.869547296833138
       estimated_peak_memory_range:
         min: 4202496
         max: 4202496
@@ -162,7 +162,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: j7gjl3d7p
+      job_id: j7gjk2mx5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -171,4 +171,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:39.597982Z'
+    timestamp: '2024-06-08T22:15:18Z'
diff --git a/qai_hub_models/models/baichuan_7b_quantized/README.md b/qai_hub_models/models/baichuan_7b_quantized/README.md
index cff30e46..7a79da7a 100644
--- a/qai_hub_models/models/baichuan_7b_quantized/README.md
+++ b/qai_hub_models/models/baichuan_7b_quantized/README.md
@@ -20,7 +20,7 @@ a hosted Qualcomm® device.
 ## License
 - The license for the original implementation of Baichuan-7B can be found
   [here](https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE)
 
 ## References
 * [Baichuan 2: Open Large-scale Language Models](https://arxiv.org/abs/2309.10305)
diff --git a/qai_hub_models/models/common.py b/qai_hub_models/models/common.py
index bc0886c5..fc75c06c 100644
--- a/qai_hub_models/models/common.py
+++ b/qai_hub_models/models/common.py
@@ -12,13 +12,16 @@ class TargetRuntime(Enum):
     TFLITE = 0
     QNN = 1
     ORT = 2
+    PRECOMPILED_ORT = 3
 
     def __str__(self):
         return self.name.lower()
 
     @property
     def long_name(self):
-        return f"torchscript_onnx_{self.name.lower()}"
+        if "precompiled" not in self.name.lower():
+            return f"torchscript_onnx_{self.name.lower()}"
+        return f"{self.name.lower()}"
 
 
 class SourceModelFormat(Enum):
diff --git a/qai_hub_models/models/controlnet_quantized/README.md b/qai_hub_models/models/controlnet_quantized/README.md
index 2c155773..6e992553 100644
--- a/qai_hub_models/models/controlnet_quantized/README.md
+++ b/qai_hub_models/models/controlnet_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ControlNet can be found
   [here](https://github.com/lllyasviel/ControlNet/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/lllyasviel/ControlNet/blob/main/LICENSE)
 
 ## References
 * [Adding Conditional Control to Text-to-Image Diffusion Models](https://arxiv.org/abs/2302.05543)
diff --git a/qai_hub_models/models/convnext_tiny/README.md b/qai_hub_models/models/convnext_tiny/README.md
index 9e71c767..0c961ecb 100644
--- a/qai_hub_models/models/convnext_tiny/README.md
+++ b/qai_hub_models/models/convnext_tiny/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ConvNext-Tiny can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545)
diff --git a/qai_hub_models/models/convnext_tiny/evaluate.py b/qai_hub_models/models/convnext_tiny/evaluate.py
new file mode 100644
index 00000000..1ae449fa
--- /dev/null
+++ b/qai_hub_models/models/convnext_tiny/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.convnext_tiny import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py
index 7342e877..3e52ca90 100644
--- a/qai_hub_models/models/convnext_tiny/export.py
+++ b/qai_hub_models/models/convnext_tiny/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/convnext_tiny/info.yaml b/qai_hub_models/models/convnext_tiny/info.yaml
index f3e7e9e1..9bbe75fa 100644
--- a/qai_hub_models/models/convnext_tiny/info.yaml
+++ b/qai_hub_models/models/convnext_tiny/info.yaml
@@ -38,3 +38,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml
index 80ea16f7..70048eca 100644
--- a/qai_hub_models/models/convnext_tiny/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ConvNext-Tiny
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 5749.0
-      throughput: 173.94329448599757
+      inference_time: 5717.0
+      throughput: 174.91691446562882
       estimated_peak_memory_range:
-        min: 3657728
-        max: 74915816
+        min: 45056
+        max: 2631376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 328
-      job_id: jvgdv6nkg
+      job_id: jvgd7qleg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3812.0
-      throughput: 262.3294858342078
+      inference_time: 3769.0
+      throughput: 265.32236667551075
       estimated_peak_memory_range:
-        min: 12288
-        max: 202348976
+        min: 81920
+        max: 202159384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: j0px109jg
+      job_id: j0pxe6215
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 16318.0
-      throughput: 61.282019855374436
+      inference_time: 16427.0
+      throughput: 60.875388080599016
       estimated_peak_memory_range:
-        min: 16384
-        max: 153639296
+        min: 110592
+        max: 152489568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jep2mo0x5
+      job_id: jep239n4g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:39.628526Z'
+    timestamp: '2024-06-08T22:16:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 3967.0
-      throughput: 252.07965717166624
+      inference_time: 3988.0
+      throughput: 250.75225677031094
       estimated_peak_memory_range:
         min: 16384
-        max: 211805024
+        max: 212477920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 328
-      job_id: jz57do2q5
+      job_id: jz57vl3l5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 2732.0
       throughput: 366.03221083455344
       estimated_peak_memory_range:
-        min: 0
-        max: 90889216
+        min: 741376
+        max: 87297136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jo5mz9eyp
+      job_id: jo5mv6yw5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 11793.0
-      throughput: 84.79606546256254
+      inference_time: 11884.0
+      throughput: 84.14675193537529
       estimated_peak_memory_range:
-        min: 618496
-        max: 60160448
+        min: 139571200
+        max: 200346752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jqpyd8rrp
+      job_id: jqpyvj07p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:39.628637Z'
+    timestamp: '2024-06-08T22:16:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 5705.0
-      throughput: 175.28483786152498
+      inference_time: 5701.0
+      throughput: 175.40782318891422
       estimated_peak_memory_range:
-        min: 65536
-        max: 2353464
+        min: 49152
+        max: 2985728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 328
-      job_id: jqp4wenqg
+      job_id: jqp4jd0vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3788.0
-      throughput: 263.99155227032736
+      inference_time: 3779.0
+      throughput: 264.6202699126753
       estimated_peak_memory_range:
-        min: 86016
-        max: 201872936
+        min: 94208
+        max: 182002576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jopryx8vg
+      job_id: jopr12j9g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:39.628718Z'
+    timestamp: '2024-06-08T22:16:03Z'
   - torchscript_onnx_qnn:
-      inference_time: 3927.0
-      throughput: 254.64731347084287
+      inference_time: 3907.0
+      throughput: 255.9508574353724
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jegne1lvg
+      job_id: jegnrm8r5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 17066.0
-      throughput: 58.59603890776984
+      inference_time: 16908.0
+      throughput: 59.143600662408325
       estimated_peak_memory_range:
-        min: 449466368
-        max: 449466368
+        min: 294563840
+        max: 294563840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: j2p0ro32p
+      job_id: j2p0e2765
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:39.628784Z'
+    timestamp: '2024-06-08T22:16:06Z'
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md b/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md
index b7a7d0e7..a62f06f9 100644
--- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ConvNext-Tiny-w8a16-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545)
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/evaluate.py b/qai_hub_models/models/convnext_tiny_w8a16_quantized/evaluate.py
new file mode 100644
index 00000000..362002ba
--- /dev/null
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/evaluate.py
@@ -0,0 +1,64 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.convnext_tiny_w8a16_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+        supports_tflite=False,
+        supports_ort=False,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py
index beeafaae..39153360 100644
--- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml
index 5370c05d..be8bde1b 100644
--- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml
index 0ae7d848..431c0b4f 100644
--- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml
@@ -8,10 +8,8 @@ aggregated:
   - Google Pixel 4
   - Google Pixel 4a
   - Google Pixel 5a 5G
-  - QCS6490 (Proxy)
   - QCS8250 (Proxy)
   - QCS8550 (Proxy)
-  - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
@@ -30,7 +28,6 @@ aggregated:
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
-  - Qcs6490
   - Qcs8250
   - Qcs8550
   - Snapdragon® 8 Gen 1
@@ -42,11 +39,11 @@ models:
 - name: ConvNext-Tiny-w8a16-Quantized
   performance_metrics:
   - torchscript_onnx_qnn:
-      inference_time: 3253.0
-      throughput: 307.40854595757764
+      inference_time: 3272.0
+      throughput: 305.6234718826406
       estimated_peak_memory_range:
-        min: 12288
-        max: 138858328
+        min: 323584
+        max: 8383168
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +51,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jw561oevp
+      job_id: jogkrqm25
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +60,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:39.928338Z'
+    timestamp: '2024-06-08T22:16:49Z'
   - torchscript_onnx_qnn:
-      inference_time: 2291.0
-      throughput: 436.4906154517678
+      inference_time: 2286.0
+      throughput: 437.4453193350831
       estimated_peak_memory_range:
-        min: 315392
-        max: 90690416
+        min: 0
+        max: 90112528
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +74,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: j1p3movxg
+      job_id: jn5q9ro4p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +83,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:39.928544Z'
+    timestamp: '2024-06-08T22:16:50Z'
   - torchscript_onnx_qnn:
-      inference_time: 3277.0
-      throughput: 305.15715593530666
+      inference_time: 3255.0
+      throughput: 307.21966205837174
       estimated_peak_memory_range:
-        min: 319488
-        max: 8809304
+        min: 16384
+        max: 11232112
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +97,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: j1pvw207g
+      job_id: jw56qzl0g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,44 +106,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:39.928727Z'
-  - torchscript_onnx_qnn:
-      inference_time: 11165.0
-      throughput: 89.56560680698611
-      estimated_peak_memory_range:
-        min: 323584
-        max: 98049920
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 215
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 215
-      job_id: j7gjl3z7p
-      job_status: Passed
-    reference_device_info:
-      name: RB3 Gen 2 (Proxy)
-      os: '12'
-      form_factor: Iot
-      os_name: Android
-      manufacturer: Qualcomm
-      chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.073528Z'
-  - reference_device_info:
-      name: RB5 (Proxy)
-      os: '12'
-      form_factor: Iot
-      os_name: Android
-      manufacturer: Qualcomm
-      chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.202310Z'
+    timestamp: '2024-06-08T22:16:52Z'
   - torchscript_onnx_qnn:
-      inference_time: 3621.0
-      throughput: 276.16680475006905
+      inference_time: 3567.0
+      throughput: 280.3476310625175
       estimated_peak_memory_range:
-        min: 417792
-        max: 417792
+        min: 221184
+        max: 221184
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -154,7 +120,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jwgovdk45
+      job_id: j1gle2r8p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -163,4 +129,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.203221Z'
+    timestamp: '2024-06-08T22:16:51Z'
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md b/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md
index 35d6bf68..d913e14e 100644
--- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ConvNext-Tiny-w8a8-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545)
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py
new file mode 100644
index 00000000..76c29397
--- /dev/null
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py
@@ -0,0 +1,64 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.convnext_tiny_w8a8_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+        supports_tflite=False,
+        supports_ort=False,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py
index 787f0f4f..54c73379 100644
--- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml
index b3770255..a27d24b3 100644
--- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml
index 08bb11f7..cc741e4a 100644
--- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml
@@ -8,10 +8,8 @@ aggregated:
   - Google Pixel 4
   - Google Pixel 4a
   - Google Pixel 5a 5G
-  - QCS6490 (Proxy)
   - QCS8250 (Proxy)
   - QCS8550 (Proxy)
-  - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
@@ -30,7 +28,6 @@ aggregated:
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
-  - Qcs6490
   - Qcs8250
   - Qcs8550
   - Snapdragon® 8 Gen 1
@@ -42,11 +39,11 @@ models:
 - name: ConvNext-Tiny-w8a8-Quantized
   performance_metrics:
   - torchscript_onnx_qnn:
-      inference_time: 1732.0
-      throughput: 577.3672055427252
+      inference_time: 1723.0
+      throughput: 580.3830528148578
       estimated_peak_memory_range:
-        min: 28672
-        max: 12392608
+        min: 12288
+        max: 127334120
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +51,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jvgdv6ekg
+      job_id: jygzvjkkp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +60,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.231037Z'
+    timestamp: '2024-06-08T22:17:38Z'
   - torchscript_onnx_qnn:
-      inference_time: 1204.0
-      throughput: 830.5647840531561
+      inference_time: 1207.0
+      throughput: 828.5004142502071
       estimated_peak_memory_range:
-        min: 163840
-        max: 89393856
+        min: 12288
+        max: 87553664
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +74,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jz57do0q5
+      job_id: jz5wmq66g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +83,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.231081Z'
+    timestamp: '2024-06-08T22:17:39Z'
   - torchscript_onnx_qnn:
-      inference_time: 1729.0
-      throughput: 578.368999421631
+      inference_time: 1724.0
+      throughput: 580.046403712297
       estimated_peak_memory_range:
-        min: 172032
-        max: 8506368
+        min: 20480
+        max: 10474536
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +97,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: j0px10njg
+      job_id: jnp1qez2g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,13 +106,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.231120Z'
+    timestamp: '2024-06-08T22:17:41Z'
   - torchscript_onnx_qnn:
-      inference_time: 6345.0
-      throughput: 157.60441292356185
+      inference_time: 1917.0
+      throughput: 521.6484089723526
       estimated_peak_memory_range:
-        min: 163840
-        max: 87882064
+        min: 503808
+        max: 503808
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -123,38 +120,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jo5mz9qyp
-      job_status: Passed
-    reference_device_info:
-      name: RB3 Gen 2 (Proxy)
-      os: '12'
-      form_factor: Iot
-      os_name: Android
-      manufacturer: Qualcomm
-      chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.231157Z'
-  - reference_device_info:
-      name: RB5 (Proxy)
-      os: '12'
-      form_factor: Iot
-      os_name: Android
-      manufacturer: Qualcomm
-      chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.231162Z'
-  - torchscript_onnx_qnn:
-      inference_time: 1931.0
-      throughput: 517.8663904712585
-      estimated_peak_memory_range:
-        min: 499712
-        max: 499712
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 215
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 215
-      job_id: jqp4wekqg
+      job_id: jmg99wnlg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -163,4 +129,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.231201Z'
+    timestamp: '2024-06-08T22:17:40Z'
diff --git a/qai_hub_models/models/ddrnet23_slim/README.md b/qai_hub_models/models/ddrnet23_slim/README.md
index 22b47996..a72d09f7 100644
--- a/qai_hub_models/models/ddrnet23_slim/README.md
+++ b/qai_hub_models/models/ddrnet23_slim/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DDRNet23-Slim can be found
   [here](https://github.com/chenjun2hao/DDRNet.pytorch/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes](https://arxiv.org/abs/2101.06085)
diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py
index 8ed00b91..6f64655c 100644
--- a/qai_hub_models/models/ddrnet23_slim/export.py
+++ b/qai_hub_models/models/ddrnet23_slim/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -224,7 +224,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/ddrnet23_slim/info.yaml b/qai_hub_models/models/ddrnet23_slim/info.yaml
index cf0776a3..02c624fd 100644
--- a/qai_hub_models/models/ddrnet23_slim/info.yaml
+++ b/qai_hub_models/models/ddrnet23_slim/info.yaml
@@ -37,3 +37,4 @@ license_type: mit
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml
index 62939a16..0bb7c379 100644
--- a/qai_hub_models/models/ddrnet23_slim/perf.yaml
+++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DDRNet23-Slim
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6730.0
-      throughput: 148.5884101040119
+      inference_time: 6650.0
+      throughput: 150.37593984962405
       estimated_peak_memory_range:
-        min: 1011712
-        max: 3811568
+        min: 57344
+        max: 27662296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: j2p0ro22p
+      job_id: jo5mv3xw5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 9468.0
-      throughput: 105.61892691170257
+      inference_time: 9735.0
+      throughput: 102.7221366204417
       estimated_peak_memory_range:
-        min: 13135872
-        max: 50337872
+        min: 12599296
+        max: 48937112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 155
-      job_id: jn5q24r75
+      job_id: jep23ly4g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.256306Z'
+    timestamp: '2024-06-08T22:18:08Z'
   - torchscript_onnx_tflite:
-      inference_time: 4767.0
-      throughput: 209.77554017201595
+      inference_time: 4742.0
+      throughput: 210.88148460565162
       estimated_peak_memory_range:
         min: 16384
-        max: 73077936
+        max: 73234384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: j1p87jmz5
+      job_id: jegnr3vr5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6106.0
-      throughput: 163.77333770062233
+      inference_time: 6012.0
+      throughput: 166.333998669328
       estimated_peak_memory_range:
-        min: 327680
-        max: 40134176
+        min: 524288
+        max: 42757008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 155
-      job_id: j1glkw2ep
+      job_id: jqpyv637p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.256364Z'
+    timestamp: '2024-06-08T22:18:09Z'
   - torchscript_onnx_tflite:
-      inference_time: 6723.0
-      throughput: 148.74312063067083
+      inference_time: 6672.0
+      throughput: 149.8800959232614
       estimated_peak_memory_range:
-        min: 1007616
-        max: 3000632
+        min: 991232
+        max: 15704000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: jogky6qyp
+      job_id: jopr1e39g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.256392Z'
+    timestamp: '2024-06-08T22:18:06Z'
   - torchscript_onnx_ort:
-      inference_time: 9545.0
-      throughput: 104.76689366160294
+      inference_time: 9609.0
+      throughput: 104.06910188365075
       estimated_peak_memory_range:
-        min: 13291520
-        max: 13291520
+        min: 9854976
+        max: 9854976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 155
-      job_id: jw561ozvp
+      job_id: j2p0el065
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.256424Z'
+    timestamp: '2024-06-08T22:18:10Z'
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md b/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md
index 49ab4d78..96ebfd8f 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DeepLabV3-Plus-MobileNet can be found
   [here](https://github.com/jfzhang95/pytorch-deeplab-xception/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py
index 08966976..e1a7394c 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml
index ca033e0e..a8ca02ac 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml
@@ -35,3 +35,4 @@ license_type: mit
 deploy_license_type: AI Model Hub License
 dataset:
   - VOC2012
+labels_file: voc_labels.txt
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml
index 43963ab9..dc3430eb 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DeepLabV3-Plus-MobileNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 13093.0
-      throughput: 76.37668983426258
+      inference_time: 13047.0
+      throughput: 76.64597225415804
       estimated_peak_memory_range:
         min: 21032960
-        max: 67752032
+        max: 22679264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 98
-      job_id: jwgovdn45
+      job_id: jogkr3x25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 12869.0
-      throughput: 77.70611547128759
+      inference_time: 12852.0
+      throughput: 77.8089013383131
       estimated_peak_memory_range:
-        min: 3112960
-        max: 21269072
+        min: 4210688
+        max: 20359032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jlpev6w75
+      job_id: jw56qn40g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 17611.0
-      throughput: 56.78269263528477
+      inference_time: 17763.0
+      throughput: 56.296796712267074
       estimated_peak_memory_range:
-        min: 44322816
-        max: 75546832
+        min: 40357888
+        max: 70272240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jnp18owkg
+      job_id: j7gjkenx5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.282247Z'
+    timestamp: '2024-06-08T22:18:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 9834.0
-      throughput: 101.6880211511084
+      inference_time: 9612.0
+      throughput: 104.03662089055348
       estimated_peak_memory_range:
-        min: 20480
-        max: 67577072
+        min: 32768
+        max: 69905408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 98
-      job_id: j1pvw2r7g
+      job_id: jn5q93q4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 9421.0
-      throughput: 106.14584439019212
+      inference_time: 9482.0
+      throughput: 105.4629824931449
       estimated_peak_memory_range:
-        min: 3194880
-        max: 58526144
+        min: 3174400
+        max: 58616848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jygz7zjzp
+      job_id: j1p3qe0l5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 14395.0
-      throughput: 69.46856547412295
+      inference_time: 13976.0
+      throughput: 71.55123068116771
       estimated_peak_memory_range:
-        min: 28495872
-        max: 65141056
+        min: 53886976
+        max: 88707568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jvgdv6qkg
+      job_id: jlpe4km15
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.282320Z'
+    timestamp: '2024-06-08T22:18:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 13172.0
-      throughput: 75.91861524445794
+      inference_time: 13150.0
+      throughput: 76.04562737642586
       estimated_peak_memory_range:
-        min: 22175744
-        max: 38738920
+        min: 22147072
+        max: 24149720
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 98
-      job_id: j7gjl327p
+      job_id: j1gle3m8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 12913.0
-      throughput: 77.44133818632386
+      inference_time: 12879.0
+      throughput: 77.64577995185962
       estimated_peak_memory_range:
-        min: 3186688
-        max: 18745072
+        min: 3198976
+        max: 19885424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jmg94oyq5
+      job_id: j1pvzvkjg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.282366Z'
+    timestamp: '2024-06-08T22:18:38Z'
   - torchscript_onnx_qnn:
-      inference_time: 16505.0
-      throughput: 60.58770069675856
+      inference_time: 16510.0
+      throughput: 60.56935190793458
       estimated_peak_memory_range:
         min: 3170304
         max: 3170304
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jz5w9y3zp
+      job_id: jwgoe36xp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 16741.0
-      throughput: 59.73358819664297
+      inference_time: 16653.0
+      throughput: 60.04924037710923
       estimated_peak_memory_range:
-        min: 102998016
-        max: 102998016
+        min: 105144320
+        max: 105144320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5w9y3jp
+      job_id: jygzvrdkp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.282417Z'
+    timestamp: '2024-06-08T22:18:41Z'
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md
index 9ee23f8b..79770236 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DeepLabV3-Plus-MobileNet-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py
index 95facb42..88702832 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml
index 80402e21..d8a6ff9e 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml
@@ -37,3 +37,4 @@ license_type: mit
 deploy_license_type: AI Model Hub License
 dataset:
   - VOC2012
+labels_file: voc_labels.txt
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml
index 76310879..d4f699a6 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml
@@ -42,26 +42,26 @@ models:
 - name: DeepLabV3-Plus-MobileNet-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3331.0
-      throughput: 300.2101471029721
+      inference_time: 3596.0
+      throughput: 278.08676307007784
       estimated_peak_memory_range:
-        min: 12288
-        max: 3077704
+        min: 16384
+        max: 1830768
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 99
+        layers_on_npu: 102
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 99
-      job_id: jnp18owlg
+        total_layers: 102
+      job_id: jmg99wllg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5345.0
-      throughput: 187.0907390084191
+      inference_time: 5322.0
+      throughput: 187.89928598271325
       estimated_peak_memory_range:
-        min: 172032
-        max: 46772064
+        min: 806912
+        max: 7295144
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jo5mz93qp
+      job_id: jnp1qe48g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 18725.0
-      throughput: 53.4045393858478
+      inference_time: 16175.0
+      throughput: 61.82380216383308
       estimated_peak_memory_range:
-        min: 114765824
-        max: 132619664
+        min: 42803200
+        max: 54255496
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 122
+        layers_on_npu: 120
         layers_on_gpu: 0
         layers_on_cpu: 51
-        total_layers: 173
-      job_id: j2p0rolep
+        total_layers: 171
+      job_id: j0pxeyk35
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,28 +93,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.316733Z'
+    timestamp: '2024-06-08T22:19:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 2493.0
-      throughput: 401.1231448054553
+      inference_time: 2668.0
+      throughput: 374.8125937031484
       estimated_peak_memory_range:
         min: 12288
-        max: 57316816
+        max: 60104416
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 99
+        layers_on_npu: 102
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 99
-      job_id: jvgdv6qlg
+        total_layers: 102
+      job_id: jnp1qe42g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3954.0
-      throughput: 252.90844714213455
+      inference_time: 3937.0
+      throughput: 254.00050800101602
       estimated_peak_memory_range:
         min: 802816
-        max: 61326896
+        max: 61474288
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jegne13mg
+      job_id: jvgd7oxrg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 13415.0
-      throughput: 74.54342154304882
+      inference_time: 12210.0
+      throughput: 81.9000819000819
       estimated_peak_memory_range:
-        min: 112525312
-        max: 167003632
+        min: 33013760
+        max: 87227648
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 122
+        layers_on_npu: 120
         layers_on_gpu: 0
         layers_on_cpu: 51
-        total_layers: 173
-      job_id: j1p87jz85
+        total_layers: 171
+      job_id: jo5mv3nd5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,28 +146,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.316808Z'
+    timestamp: '2024-06-08T22:19:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 3344.0
-      throughput: 299.0430622009569
+      inference_time: 3596.0
+      throughput: 278.08676307007784
       estimated_peak_memory_range:
         min: 12288
-        max: 1744048
+        max: 8750088
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 99
+        layers_on_npu: 102
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 99
-      job_id: jz57doxr5
+        total_layers: 102
+      job_id: jvgd7oxeg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5365.0
-      throughput: 186.39328984156572
+      inference_time: 5333.0
+      throughput: 187.51171948246764
       estimated_peak_memory_range:
-        min: 950272
-        max: 81304632
+        min: 20480
+        max: 12661968
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jep2molm5
+      job_id: jqp4jvl8p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,36 +184,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.316853Z'
+    timestamp: '2024-06-08T22:19:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 15002.0
-      throughput: 66.65777896280495
+      inference_time: 14989.0
+      throughput: 66.71559143371806
       estimated_peak_memory_range:
-        min: 5537792
-        max: 44275008
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 99
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 99
-      job_id: jqp4wevlg
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 19890.0
-      throughput: 50.27652086475616
-      estimated_peak_memory_range:
-        min: 839680
-        max: 50580512
+        min: 12288
+        max: 39155056
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 100
+        layers_on_npu: 102
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 100
-      job_id: jqpyd864p
+        total_layers: 102
+      job_id: jz5wmqe3g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,21 +207,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.316896Z'
+    timestamp: '2024-06-08T22:19:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 120249.0
-      throughput: 8.316077472577735
+      inference_time: 126163.0
+      throughput: 7.926254131559966
       estimated_peak_memory_range:
-        min: 11714560
-        max: 29650216
+        min: 11575296
+        max: 14463800
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 96
+        layers_on_npu: 99
         layers_on_gpu: 3
         layers_on_cpu: 0
-        total_layers: 99
-      job_id: j0px10y9g
+        total_layers: 102
+      job_id: jmg99wlwg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.316921Z'
+    timestamp: '2024-06-08T22:19:24Z'
   - torchscript_onnx_qnn:
-      inference_time: 5376.0
-      throughput: 186.01190476190476
+      inference_time: 5241.0
+      throughput: 190.80328181644725
       estimated_peak_memory_range:
-        min: 790528
-        max: 790528
+        min: 798720
+        max: 798720
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jopryxeeg
+      job_id: jz57vxyv5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 33438.0
-      throughput: 29.906094862132903
+      inference_time: 22921.0
+      throughput: 43.628113956633655
       estimated_peak_memory_range:
-        min: 131166208
-        max: 131166208
+        min: 59097088
+        max: 59097088
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 122
+        layers_on_npu: 120
         layers_on_gpu: 0
         layers_on_cpu: 51
-        total_layers: 173
-      job_id: jogky63op
+        total_layers: 171
+      job_id: jegnr36k5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.316975Z'
+    timestamp: '2024-06-08T22:19:31Z'
diff --git a/qai_hub_models/models/deeplabv3_resnet50/README.md b/qai_hub_models/models/deeplabv3_resnet50/README.md
index 1fec3ba3..c7cf9fab 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/README.md
+++ b/qai_hub_models/models/deeplabv3_resnet50/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DeepLabV3-ResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py
index b73993c7..1f3fda9b 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/export.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -225,7 +225,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/deeplabv3_resnet50/info.yaml b/qai_hub_models/models/deeplabv3_resnet50/info.yaml
index 6ae16371..ca44a6e5 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/info.yaml
+++ b/qai_hub_models/models/deeplabv3_resnet50/info.yaml
@@ -35,3 +35,4 @@ has_animated_banner: yes
 license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset: []
+labels_file: voc_labels.txt
diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
index 5f4591d3..a21c01a6 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DeepLabV3-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 290505.0
-      throughput: 3.4422815442075008
+      inference_time: 292980.0
+      throughput: 3.413202266366305
       estimated_peak_memory_range:
-        min: 4493312
-        max: 181829312
+        min: 2162688
+        max: 149701296
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,7 +48,7 @@ models:
         layers_on_gpu: 95
         layers_on_cpu: 0
         total_layers: 95
-      job_id: j1glkw3lp
+      job_id: jep23lkrg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +57,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.360596Z'
+    timestamp: '2024-06-08T22:19:58Z'
   - torchscript_onnx_tflite:
-      inference_time: 217580.0
-      throughput: 4.596010662744738
+      inference_time: 223885.0
+      throughput: 4.466578823949796
       estimated_peak_memory_range:
-        min: 20635648
-        max: 55850720
+        min: 65536
+        max: 32739680
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -71,7 +71,7 @@ models:
         layers_on_gpu: 95
         layers_on_cpu: 0
         total_layers: 95
-      job_id: jw561on7p
+      job_id: jqpyv618p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +80,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.360628Z'
+    timestamp: '2024-06-08T22:19:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 293257.0
-      throughput: 3.4099782784383663
+      inference_time: 291243.0
+      throughput: 3.4335589181542563
       estimated_peak_memory_range:
-        min: 2183168
-        max: 149075440
+        min: 5476352
+        max: 182706000
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -94,7 +94,7 @@ models:
         layers_on_gpu: 95
         layers_on_cpu: 0
         total_layers: 95
-      job_id: j1p3moezg
+      job_id: j2p0elz95
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,12 +103,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.360656Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.360664Z'
+    timestamp: '2024-06-08T22:20:00Z'
diff --git a/qai_hub_models/models/densenet121/README.md b/qai_hub_models/models/densenet121/README.md
index 1f95a118..a4221d1c 100644
--- a/qai_hub_models/models/densenet121/README.md
+++ b/qai_hub_models/models/densenet121/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DenseNet-121 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
diff --git a/qai_hub_models/models/densenet121/evaluate.py b/qai_hub_models/models/densenet121/evaluate.py
new file mode 100644
index 00000000..64361a06
--- /dev/null
+++ b/qai_hub_models/models/densenet121/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.densenet121 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/densenet121/export.py b/qai_hub_models/models/densenet121/export.py
index 9e6eb456..9f0c3592 100644
--- a/qai_hub_models/models/densenet121/export.py
+++ b/qai_hub_models/models/densenet121/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/densenet121/info.yaml b/qai_hub_models/models/densenet121/info.yaml
index 9d1dda3b..a14749dd 100644
--- a/qai_hub_models/models/densenet121/info.yaml
+++ b/qai_hub_models/models/densenet121/info.yaml
@@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml
index 3b6037e6..4cfd87c1 100644
--- a/qai_hub_models/models/densenet121/perf.yaml
+++ b/qai_hub_models/models/densenet121/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DenseNet-121
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1945.0
-      throughput: 514.1388174807198
+      inference_time: 1946.0
+      throughput: 513.874614594039
       estimated_peak_memory_range:
-        min: 12288
-        max: 20617520
+        min: 20480
+        max: 2555328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 312
-      job_id: jnp18oelg
+      job_id: jygzvr2op
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1983.0
-      throughput: 504.2864346949067
+      inference_time: 1998.0
+      throughput: 500.5005005005005
       estimated_peak_memory_range:
-        min: 12288
-        max: 29686216
+        min: 647168
+        max: 7884416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jqp4wewlg
+      job_id: jnp1qe28g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1988.0
-      throughput: 503.01810865191146
+      inference_time: 1954.0
+      throughput: 511.77072671443193
       estimated_peak_memory_range:
-        min: 12288
-        max: 45851224
+        min: 16384
+        max: 41751336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 374
-      job_id: jopryxyeg
+      job_id: j0pxey935
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.378424Z'
+    timestamp: '2024-06-08T22:20:34Z'
   - torchscript_onnx_tflite:
       inference_time: 1318.0
       throughput: 758.7253414264036
       estimated_peak_memory_range:
         min: 12288
-        max: 96176016
+        max: 96529440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 312
-      job_id: jvgdv6olg
+      job_id: jz5wmqw3g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1322.0
-      throughput: 756.4296520423601
+      inference_time: 1329.0
+      throughput: 752.4454477050414
       estimated_peak_memory_range:
-        min: 638976
-        max: 160369936
+        min: 618496
+        max: 158201904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: j0px1019g
+      job_id: jvgd7onrg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1313.0
-      throughput: 761.6146230007616
+      inference_time: 1326.0
+      throughput: 754.1478129713424
       estimated_peak_memory_range:
-        min: 0
-        max: 51247376
+        min: 618496
+        max: 52734944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 374
-      job_id: jep2momm5
+      job_id: jo5mv3ed5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.378568Z'
+    timestamp: '2024-06-08T22:20:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 1941.0
-      throughput: 515.1983513652756
+      inference_time: 1932.0
+      throughput: 517.5983436853002
       estimated_peak_memory_range:
-        min: 12288
-        max: 3049672
+        min: 24576
+        max: 2118480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 312
-      job_id: jz57dodr5
+      job_id: jmg99w0wg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1994.0
-      throughput: 501.5045135406219
+      inference_time: 1991.0
+      throughput: 502.26017076845807
       estimated_peak_memory_range:
-        min: 630784
-        max: 6220120
+        min: 16384
+        max: 39662792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jegne1emg
+      job_id: jqp4jvn8p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.378662Z'
+    timestamp: '2024-06-08T22:20:33Z'
   - torchscript_onnx_qnn:
-      inference_time: 2221.0
-      throughput: 450.24763619990995
+      inference_time: 2224.0
+      throughput: 449.64028776978415
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jo5mz9zqp
+      job_id: jz57vx2v5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2070.0
-      throughput: 483.09178743961354
+      inference_time: 2023.0
+      throughput: 494.3153732081068
       estimated_peak_memory_range:
-        min: 634880
-        max: 634880
+        min: 647168
+        max: 647168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 374
-      job_id: jqpyd8d4p
+      job_id: jegnr30k5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.378764Z'
+    timestamp: '2024-06-08T22:20:36Z'
diff --git a/qai_hub_models/models/detr_resnet101/README.md b/qai_hub_models/models/detr_resnet101/README.md
index adaff2fc..662a86c6 100644
--- a/qai_hub_models/models/detr_resnet101/README.md
+++ b/qai_hub_models/models/detr_resnet101/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet101 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py
index 863e0358..8456a642 100644
--- a/qai_hub_models/models/detr_resnet101/export.py
+++ b/qai_hub_models/models/detr_resnet101/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -215,7 +215,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml
index 3bfa8d81..ab111618 100644
--- a/qai_hub_models/models/detr_resnet101/perf.yaml
+++ b/qai_hub_models/models/detr_resnet101/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DETR-ResNet101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 24796.0
-      throughput: 40.32908533634457
+      inference_time: 24522.0
+      throughput: 40.779708017290595
       estimated_peak_memory_range:
-        min: 430080
-        max: 3276392
+        min: 405504
+        max: 3620824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 839
-      job_id: j1p87j785
+      job_id: jep23lxrg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 22705.0
-      throughput: 44.04316229905307
+      inference_time: 22510.0
+      throughput: 44.4247001332741
       estimated_peak_memory_range:
-        min: 2146304
-        max: 309086736
+        min: 53248
+        max: 301197496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: j1pvw2wmg
+      job_id: jw56qn26g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.413162Z'
+    timestamp: '2024-06-08T22:21:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 17296.0
-      throughput: 57.816836262719704
+      inference_time: 17593.0
+      throughput: 56.840788950150625
       estimated_peak_memory_range:
-        min: 36864
-        max: 283795968
+        min: 385024
+        max: 284374432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 839
-      job_id: jogky6yop
+      job_id: jqpyv6z8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 16129.0
-      throughput: 62.000124000248
+      inference_time: 15841.0
+      throughput: 63.127327820213374
       estimated_peak_memory_range:
-        min: 2801664
-        max: 112669552
+        min: 38055936
+        max: 153822592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: j7gjl3l8p
+      job_id: j1p3qen35
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.413365Z'
+    timestamp: '2024-06-08T22:21:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 24577.0
-      throughput: 40.68844854945681
+      inference_time: 24627.0
+      throughput: 40.60583911966541
       estimated_peak_memory_range:
-        min: 434176
-        max: 3536864
+        min: 413696
+        max: 3309184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 839
-      job_id: jn5q242m5
+      job_id: j2p0el495
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.413467Z'
+    timestamp: '2024-06-08T22:21:11Z'
   - torchscript_onnx_ort:
-      inference_time: 22988.0
-      throughput: 43.50095702105446
+      inference_time: 22958.0
+      throughput: 43.557801202195314
       estimated_peak_memory_range:
-        min: 115417088
-        max: 115417088
+        min: 100909056
+        max: 100909056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: jlpev6v05
+      job_id: jwgoe3zqp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.413572Z'
+    timestamp: '2024-06-08T22:21:18Z'
diff --git a/qai_hub_models/models/detr_resnet101/requirements.txt b/qai_hub_models/models/detr_resnet101/requirements.txt
index 546533cd..f43e4f4b 100644
--- a/qai_hub_models/models/detr_resnet101/requirements.txt
+++ b/qai_hub_models/models/detr_resnet101/requirements.txt
@@ -1,2 +1,2 @@
 transformers==4.41.1
-timm==0.9.11
+timm==1.0.3
diff --git a/qai_hub_models/models/detr_resnet101_dc5/README.md b/qai_hub_models/models/detr_resnet101_dc5/README.md
index 272c64ab..e8c9e777 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/README.md
+++ b/qai_hub_models/models/detr_resnet101_dc5/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet101-DC5 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py
index cefe54f7..260f917d 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/export.py
+++ b/qai_hub_models/models/detr_resnet101_dc5/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -215,7 +215,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
index 06ea9ea9..6760c8c6 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
+++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DETR-ResNet101-DC5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 151967.0
-      throughput: 6.580376002684793
+      inference_time: 139662.0
+      throughput: 7.160143775687016
       estimated_peak_memory_range:
-        min: 1191936
-        max: 4041416
+        min: 1216512
+        max: 4184536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 840
-      job_id: jvgdv6vlg
+      job_id: j7gjkezv5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 126534.0
-      throughput: 7.903014209619549
+      inference_time: 125062.0
+      throughput: 7.996033967152292
       estimated_peak_memory_range:
-        min: 2162688
-        max: 314190936
+        min: 2994176
+        max: 315584184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: jqpyd844p
+      job_id: jz57vx0v5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.439417Z'
+    timestamp: '2024-06-08T22:21:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 107197.0
-      throughput: 9.32861927106169
+      inference_time: 106500.0
+      throughput: 9.389671361502348
       estimated_peak_memory_range:
-        min: 221184
-        max: 493147472
+        min: 991232
+        max: 494886848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 840
-      job_id: jz57dojr5
+      job_id: jlpe4keo5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 95203.0
-      throughput: 10.503870676344233
+      inference_time: 96040.0
+      throughput: 10.412328196584756
       estimated_peak_memory_range:
-        min: 4079616
-        max: 168798432
+        min: 4145152
+        max: 167656240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: j2p0ro1ep
+      job_id: jqp4jvk8p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.439619Z'
+    timestamp: '2024-06-08T22:22:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 141441.0
-      throughput: 7.0700857601402705
+      inference_time: 139388.0
+      throughput: 7.174218727580566
       estimated_peak_memory_range:
-        min: 16384
-        max: 4988056
+        min: 1548288
+        max: 4377008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 840
-      job_id: jqp4wexlg
+      job_id: jygzvroop
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.439721Z'
+    timestamp: '2024-06-08T22:21:54Z'
   - torchscript_onnx_ort:
-      inference_time: 125955.0
-      throughput: 7.939343416299472
+      inference_time: 124053.0
+      throughput: 8.061070671406576
       estimated_peak_memory_range:
-        min: 121176064
-        max: 121176064
+        min: 73572352
+        max: 73572352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: j1p87j385
+      job_id: j0pxeyn35
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.439826Z'
+    timestamp: '2024-06-08T22:22:01Z'
diff --git a/qai_hub_models/models/detr_resnet101_dc5/requirements.txt b/qai_hub_models/models/detr_resnet101_dc5/requirements.txt
index 546533cd..f43e4f4b 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/requirements.txt
+++ b/qai_hub_models/models/detr_resnet101_dc5/requirements.txt
@@ -1,2 +1,2 @@
 transformers==4.41.1
-timm==0.9.11
+timm==1.0.3
diff --git a/qai_hub_models/models/detr_resnet50/README.md b/qai_hub_models/models/detr_resnet50/README.md
index e37e8210..df378aea 100644
--- a/qai_hub_models/models/detr_resnet50/README.md
+++ b/qai_hub_models/models/detr_resnet50/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet50 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py
index f7bd5816..aae80346 100644
--- a/qai_hub_models/models/detr_resnet50/export.py
+++ b/qai_hub_models/models/detr_resnet50/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -215,7 +215,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml
index 974d50cd..fff85076 100644
--- a/qai_hub_models/models/detr_resnet50/perf.yaml
+++ b/qai_hub_models/models/detr_resnet50/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DETR-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 20875.0
-      throughput: 47.90419161676647
+      inference_time: 21615.0
+      throughput: 46.26416840157298
       estimated_peak_memory_range:
-        min: 421888
-        max: 3357136
+        min: 2134016
+        max: 5200288
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 771
-      job_id: jw561o37p
+      job_id: jegnr3lk5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 16647.0
-      throughput: 60.070883642698384
+      inference_time: 16643.0
+      throughput: 60.08532115604158
       estimated_peak_memory_range:
-        min: 708608
-        max: 211147208
+        min: 1540096
+        max: 211446576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jz5w9ydjp
+      job_id: j1gle36jp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.465610Z'
+    timestamp: '2024-06-08T22:22:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 14432.0
-      throughput: 69.29046563192905
+      inference_time: 15132.0
+      throughput: 66.08511763150939
       estimated_peak_memory_range:
-        min: 385024
-        max: 232248816
+        min: 36864
+        max: 231347824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 771
-      job_id: j1p3mo4zg
+      job_id: jep23l0rg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 11774.0
-      throughput: 84.93290300662477
+      inference_time: 11694.0
+      throughput: 85.51393877201984
       estimated_peak_memory_range:
-        min: 4878336
-        max: 101878160
+        min: 2809856
+        max: 97007056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jmg94o3v5
+      job_id: jw56qne6g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.465801Z'
+    timestamp: '2024-06-08T22:22:38Z'
   - torchscript_onnx_tflite:
-      inference_time: 20845.0
-      throughput: 47.97313504437515
+      inference_time: 21665.0
+      throughput: 46.157396722824835
       estimated_peak_memory_range:
-        min: 462848
-        max: 4452912
+        min: 438272
+        max: 4576272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 771
-      job_id: jwgovd1d5
+      job_id: jqpyv6r8p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.465902Z'
+    timestamp: '2024-06-08T22:22:32Z'
   - torchscript_onnx_ort:
-      inference_time: 17028.0
-      throughput: 58.72680291284942
+      inference_time: 16944.0
+      throughput: 59.01794145420208
       estimated_peak_memory_range:
-        min: 40251392
-        max: 40251392
+        min: 116158464
+        max: 116158464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jnp18odlg
+      job_id: j1p3qev35
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.465999Z'
+    timestamp: '2024-06-08T22:22:39Z'
diff --git a/qai_hub_models/models/detr_resnet50/requirements.txt b/qai_hub_models/models/detr_resnet50/requirements.txt
index 546533cd..f43e4f4b 100644
--- a/qai_hub_models/models/detr_resnet50/requirements.txt
+++ b/qai_hub_models/models/detr_resnet50/requirements.txt
@@ -1,2 +1,2 @@
 transformers==4.41.1
-timm==0.9.11
+timm==1.0.3
diff --git a/qai_hub_models/models/detr_resnet50_dc5/README.md b/qai_hub_models/models/detr_resnet50_dc5/README.md
index 39efe2a1..0e3471c6 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/README.md
+++ b/qai_hub_models/models/detr_resnet50_dc5/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet50-DC5 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py
index adfa7e7f..af83c17f 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/export.py
+++ b/qai_hub_models/models/detr_resnet50_dc5/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -215,7 +215,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
index ee2eb6c6..b9e42be2 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
+++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: DETR-ResNet50-DC5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 134142.0
-      throughput: 7.454786718551982
+      inference_time: 133335.0
+      throughput: 7.49990625117186
       estimated_peak_memory_range:
-        min: 1200128
-        max: 4600904
+        min: 135168
+        max: 3805824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 772
-      job_id: jz57dovr5
+      job_id: jz5wmq33g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 119136.0
-      throughput: 8.393768466290625
+      inference_time: 117630.0
+      throughput: 8.501232678738416
       estimated_peak_memory_range:
-        min: 0
-        max: 233208792
+        min: 2134016
+        max: 232241232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jqpyd8v4p
+      job_id: jo5mv36d5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.491723Z'
+    timestamp: '2024-06-08T22:23:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 101510.0
-      throughput: 9.851246182642104
+      inference_time: 102075.0
+      throughput: 9.796718099436688
       estimated_peak_memory_range:
-        min: 1228800
-        max: 446135248
+        min: 163840
+        max: 444293712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 772
-      job_id: jqp4wejlg
+      job_id: jmg99wywg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 91238.0
-      throughput: 10.960345470089218
+      inference_time: 90172.0
+      throughput: 11.089917047420485
       estimated_peak_memory_range:
-        min: 4784128
-        max: 148984528
+        min: 6778880
+        max: 152435808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: j2p0roeep
+      job_id: jegnr3mk5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.491907Z'
+    timestamp: '2024-06-08T22:23:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 134198.0
-      throughput: 7.451675881905841
+      inference_time: 132335.0
+      throughput: 7.556579891940908
       estimated_peak_memory_range:
         min: 1204224
-        max: 4475272
+        max: 4586176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 772
-      job_id: j0px10e9g
+      job_id: jnp1qew8g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.492030Z'
+    timestamp: '2024-06-08T22:23:09Z'
   - torchscript_onnx_ort:
-      inference_time: 118988.0
-      throughput: 8.404208827780952
+      inference_time: 116939.0
+      throughput: 8.551467004164564
       estimated_peak_memory_range:
-        min: 43630592
-        max: 43630592
+        min: 22482944
+        max: 22482944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: j1p87jw85
+      job_id: jopr1e20g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.492122Z'
+    timestamp: '2024-06-08T22:23:16Z'
diff --git a/qai_hub_models/models/detr_resnet50_dc5/requirements.txt b/qai_hub_models/models/detr_resnet50_dc5/requirements.txt
index 546533cd..f43e4f4b 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/requirements.txt
+++ b/qai_hub_models/models/detr_resnet50_dc5/requirements.txt
@@ -1,2 +1,2 @@
 transformers==4.41.1
-timm==0.9.11
+timm==1.0.3
diff --git a/qai_hub_models/models/efficientnet_b0/README.md b/qai_hub_models/models/efficientnet_b0/README.md
index 31379b53..56096dcb 100644
--- a/qai_hub_models/models/efficientnet_b0/README.md
+++ b/qai_hub_models/models/efficientnet_b0/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of EfficientNet-B0 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
diff --git a/qai_hub_models/models/efficientnet_b0/evaluate.py b/qai_hub_models/models/efficientnet_b0/evaluate.py
new file mode 100644
index 00000000..253f3004
--- /dev/null
+++ b/qai_hub_models/models/efficientnet_b0/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.efficientnet_b0 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/efficientnet_b0/export.py b/qai_hub_models/models/efficientnet_b0/export.py
index 7b0d5f3d..d4d7827c 100644
--- a/qai_hub_models/models/efficientnet_b0/export.py
+++ b/qai_hub_models/models/efficientnet_b0/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/efficientnet_b0/info.yaml b/qai_hub_models/models/efficientnet_b0/info.yaml
index 0e74436b..f02c5b91 100644
--- a/qai_hub_models/models/efficientnet_b0/info.yaml
+++ b/qai_hub_models/models/efficientnet_b0/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml
index 65190420..6383102d 100644
--- a/qai_hub_models/models/efficientnet_b0/perf.yaml
+++ b/qai_hub_models/models/efficientnet_b0/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: EfficientNet-B0
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1618.0
-      throughput: 618.0469715698393
+      inference_time: 1626.0
+      throughput: 615.0061500615006
       estimated_peak_memory_range:
         min: 16384
-        max: 2344216
+        max: 1985056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jn5q249m5
+      job_id: jqpyv6j8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1684.0
-      throughput: 593.8242280285035
+      inference_time: 1678.0
+      throughput: 595.9475566150179
       estimated_peak_memory_range:
-        min: 622592
-        max: 88873088
+        min: 16384
+        max: 315561544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: j1p3moqzg
+      job_id: jogkr3qw5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1598.0
-      throughput: 625.7822277847309
+      inference_time: 1623.0
+      throughput: 616.1429451632779
       estimated_peak_memory_range:
-        min: 12288
-        max: 80031016
+        min: 16384
+        max: 80982248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jygz7zv6p
+      job_id: j1p3qe135
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.517590Z'
+    timestamp: '2024-06-08T22:23:45Z'
   - torchscript_onnx_tflite:
-      inference_time: 1139.0
-      throughput: 877.9631255487269
+      inference_time: 1142.0
+      throughput: 875.6567425569177
       estimated_peak_memory_range:
         min: 16384
-        max: 71725056
+        max: 72610976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1glkwelp
+      job_id: j2p0el295
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1177.0
-      throughput: 849.6176720475786
+      inference_time: 1186.0
+      throughput: 843.1703204047218
       estimated_peak_memory_range:
         min: 618496
-        max: 72406400
+        max: 72353488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: jwgovded5
+      job_id: jn5q93rnp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1134.0
-      throughput: 881.8342151675485
+      inference_time: 1173.0
+      throughput: 852.5149190110827
       estimated_peak_memory_range:
-        min: 0
-        max: 33758960
+        min: 618496
+        max: 36882944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jz5w9ymjp
+      job_id: jwgoe3nqp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.517700Z'
+    timestamp: '2024-06-08T22:23:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 1630.0
-      throughput: 613.4969325153374
+      inference_time: 1631.0
+      throughput: 613.1207847946046
       estimated_peak_memory_range:
         min: 16384
-        max: 2786328
+        max: 2841808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jw561oq7p
+      job_id: j1p8wzmkp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1680.0
-      throughput: 595.2380952380952
+      inference_time: 1683.0
+      throughput: 594.1770647653001
       estimated_peak_memory_range:
-        min: 634880
-        max: 88349960
+        min: 622592
+        max: 88821056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: j7gjl3k8p
+      job_id: jw56qnz6g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.517774Z'
+    timestamp: '2024-06-08T22:23:44Z'
   - torchscript_onnx_qnn:
-      inference_time: 1830.0
-      throughput: 546.448087431694
+      inference_time: 1838.0
+      throughput: 544.069640914037
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 1310720
+        max: 1310720
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: j1pvw2zmg
+      job_id: j1gle32jp
       job_status: Passed
     torchscript_onnx_ort:
       inference_time: 1641.0
       throughput: 609.3845216331505
       estimated_peak_memory_range:
-        min: 41422848
-        max: 41422848
+        min: 32149504
+        max: 32149504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jmg94o9v5
+      job_id: j1pvzvvkg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.517848Z'
+    timestamp: '2024-06-08T22:23:47Z'
diff --git a/qai_hub_models/models/esrgan/README.md b/qai_hub_models/models/esrgan/README.md
index 4afc7424..7b22d043 100644
--- a/qai_hub_models/models/esrgan/README.md
+++ b/qai_hub_models/models/esrgan/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ESRGAN can be found
   [here](https://github.com/xinntao/ESRGAN/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks](https://arxiv.org/abs/1809.00219)
diff --git a/qai_hub_models/models/esrgan/export.py b/qai_hub_models/models/esrgan/export.py
index 8603628c..47c6b95d 100644
--- a/qai_hub_models/models/esrgan/export.py
+++ b/qai_hub_models/models/esrgan/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml
index f84124f1..e35a93e5 100644
--- a/qai_hub_models/models/esrgan/perf.yaml
+++ b/qai_hub_models/models/esrgan/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ESRGAN
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 67687.0
-      throughput: 14.773885679672611
+      inference_time: 66520.0
+      throughput: 15.033072760072159
       estimated_peak_memory_range:
-        min: 3313664
-        max: 7995200
+        min: 4288512
+        max: 7346848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jz5w9yx6p
+      job_id: jlpe4kko5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 66775.0
-      throughput: 14.97566454511419
+      inference_time: 67593.0
+      throughput: 14.794431376030062
       estimated_peak_memory_range:
-        min: 237568
-        max: 105521768
+        min: 73728
+        max: 104762776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jvgdv60eg
+      job_id: jmg99wwwg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 70447.0
-      throughput: 14.195068633156842
+      inference_time: 68322.0
+      throughput: 14.636573870788325
       estimated_peak_memory_range:
-        min: 4218880
-        max: 159787592
+        min: 6356992
+        max: 154422496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jo5mz94wp
+      job_id: jmg99ww8g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.552188Z'
+    timestamp: '2024-06-08T22:24:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 53811.0
-      throughput: 18.58356098195536
+      inference_time: 56935.0
+      throughput: 17.56388864494599
       estimated_peak_memory_range:
-        min: 3256320
-        max: 587536480
+        min: 86016
+        max: 583340176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jmg94o8l5
+      job_id: jygzvrrop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50812.0
-      throughput: 19.680390458946704
+      inference_time: 50707.0
+      throughput: 19.72114303745045
       estimated_peak_memory_range:
-        min: 77824
-        max: 256960720
+        min: 73728
+        max: 260404000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jz57do6l5
+      job_id: jnp1qee8g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 53028.0
-      throughput: 18.85796183148525
+      inference_time: 51557.0
+      throughput: 19.396008301491552
       estimated_peak_memory_range:
-        min: 6365184
-        max: 191210256
+        min: 5955584
+        max: 196150816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jegne1xrg
+      job_id: jnp1qee7g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.552520Z'
+    timestamp: '2024-06-08T22:24:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 75584.0
-      throughput: 13.230313293818798
+      inference_time: 65283.0
+      throughput: 15.31792350229003
       estimated_peak_memory_range:
-        min: 28672
-        max: 5061616
+        min: 1536000
+        max: 4290816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jnp18o32g
+      job_id: jz5wmqq3g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 64917.0
-      throughput: 15.40428547221837
+      inference_time: 65436.0
+      throughput: 15.282107708295127
       estimated_peak_memory_range:
-        min: 110592
-        max: 58149984
+        min: 2744320
+        max: 60284768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: j0px10m1g
+      job_id: jz5wmqqmg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.552756Z'
+    timestamp: '2024-06-08T22:24:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 73244.0
-      throughput: 13.652995467205505
+      inference_time: 73135.0
+      throughput: 13.67334381623026
       estimated_peak_memory_range:
-        min: 204800
-        max: 204800
+        min: 221184
+        max: 221184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jqp4we8vg
+      job_id: jvgd7oorg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 65794.0
-      throughput: 15.198954311943337
+      inference_time: 65785.0
+      throughput: 15.20103367028958
       estimated_peak_memory_range:
-        min: 1208320
-        max: 1208320
+        min: 208896
+        max: 208896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jopryx99g
+      job_id: jvgd7oozg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.552990Z'
+    timestamp: '2024-06-08T22:24:30Z'
diff --git a/qai_hub_models/models/facebook_denoiser/README.md b/qai_hub_models/models/facebook_denoiser/README.md
index eaa05e87..d4302a62 100644
--- a/qai_hub_models/models/facebook_denoiser/README.md
+++ b/qai_hub_models/models/facebook_denoiser/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Facebook-Denoiser can be found
   [here](https://github.com/facebookresearch/denoiser/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Real Time Speech Enhancement in the Waveform Domain](https://arxiv.org/abs/2006.12847)
diff --git a/qai_hub_models/models/facebook_denoiser/export.py b/qai_hub_models/models/facebook_denoiser/export.py
index b03be595..11ecd12d 100644
--- a/qai_hub_models/models/facebook_denoiser/export.py
+++ b/qai_hub_models/models/facebook_denoiser/export.py
@@ -172,7 +172,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -201,7 +201,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/facebook_denoiser/perf.yaml b/qai_hub_models/models/facebook_denoiser/perf.yaml
index 3fbd9376..912fe7c1 100644
--- a/qai_hub_models/models/facebook_denoiser/perf.yaml
+++ b/qai_hub_models/models/facebook_denoiser/perf.yaml
@@ -36,26 +36,26 @@ models:
 - name: Facebook-Denoiser
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 736952.0
-      throughput: 1.356940479162822
+      inference_time: 762754.0
+      throughput: 1.3110386835073955
       estimated_peak_memory_range:
-        min: 92352512
-        max: 464837840
+        min: 271872000
+        max: 745165216
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 0
-        layers_on_cpu: 209
-        total_layers: 209
-      job_id: jqpyd8n7p
+        layers_on_cpu: 205
+        total_layers: 205
+      job_id: jqp4jvv1p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 14540395.0
-      throughput: 0.06877392257913213
+      inference_time: 14425872.0
+      throughput: 0.06931989969133236
       estimated_peak_memory_range:
-        min: 847872
-        max: 89968496
+        min: 73728
+        max: 97772968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 178
-      job_id: jogky6d2p
+      job_id: jegnr33q5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +72,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.586858Z'
+    timestamp: '2024-06-08T22:24:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 777305.0
-      throughput: 1.2864962916744394
+      inference_time: 700116.0
+      throughput: 1.4283347331013718
       estimated_peak_memory_range:
-        min: 366411776
-        max: 386789680
+        min: 418246656
+        max: 442262688
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 0
-        layers_on_cpu: 209
-        total_layers: 209
-      job_id: j2p0rok6p
+        layers_on_cpu: 205
+        total_layers: 205
+      job_id: j0pxeyyl5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 10689541.0
-      throughput: 0.09354938626457394
+      inference_time: 10632015.0
+      throughput: 0.09405554826625057
       estimated_peak_memory_range:
-        min: 19763200
-        max: 227250656
+        min: 16744448
+        max: 226752096
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 178
-      job_id: jn5q24w45
+      job_id: jopr1ee7g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +110,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.586927Z'
+    timestamp: '2024-06-08T22:24:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 756067.0
-      throughput: 1.3226341051785093
+      inference_time: 733772.0
+      throughput: 1.3628211488037156
       estimated_peak_memory_range:
-        min: 456364032
-        max: 459426960
+        min: 89939968
+        max: 463947896
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 0
-        layers_on_cpu: 209
-        total_layers: 209
-      job_id: j1p87j8x5
+        layers_on_cpu: 205
+        total_layers: 205
+      job_id: jo5mv3395
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,10 +133,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.586964Z'
+    timestamp: '2024-06-08T22:24:54Z'
   - torchscript_onnx_ort:
-      inference_time: 16116345.0
-      throughput: 0.06204880821302845
+      inference_time: 15555145.0
+      throughput: 0.06428741101416927
       estimated_peak_memory_range:
         min: 450560
         max: 450560
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 178
-      job_id: j1glkw78p
+      job_id: jep23llqg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.586998Z'
+    timestamp: '2024-06-08T22:24:58Z'
diff --git a/qai_hub_models/models/fastsam_s/README.md b/qai_hub_models/models/fastsam_s/README.md
index 717a36e8..516dc401 100644
--- a/qai_hub_models/models/fastsam_s/README.md
+++ b/qai_hub_models/models/fastsam_s/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FastSam-S can be found
   [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE)
 
 ## References
 * [Fast Segment Anything](https://arxiv.org/abs/2306.12156)
diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py
index 2b188980..aad089b0 100644
--- a/qai_hub_models/models/fastsam_s/export.py
+++ b/qai_hub_models/models/fastsam_s/export.py
@@ -190,7 +190,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml
index 8fc1e10c..708e66d8 100644
--- a/qai_hub_models/models/fastsam_s/perf.yaml
+++ b/qai_hub_models/models/fastsam_s/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FastSam-S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 8641.0
-      throughput: 115.72734637194769
+      inference_time: 8700.0
+      throughput: 114.94252873563218
       estimated_peak_memory_range:
-        min: 8404992
-        max: 11000944
+        min: 8429568
+        max: 39456112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 288
-      job_id: j1p3mo8lg
+      job_id: j2p0elln5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 10946.0
-      throughput: 91.35757354284671
+      inference_time: 10893.0
+      throughput: 91.80207472688883
       estimated_peak_memory_range:
-        min: 19734528
-        max: 75024696
+        min: 26902528
+        max: 83130600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jmg94okl5
+      job_id: jwgoe3vkp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.613022Z'
+    timestamp: '2024-06-08T22:25:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 6423.0
-      throughput: 155.6904873112253
+      inference_time: 6426.0
+      throughput: 155.6178026766262
       estimated_peak_memory_range:
-        min: 6889472
-        max: 77082752
+        min: 6594560
+        max: 79404896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 288
-      job_id: jwgovdmx5
+      job_id: j1p8wzzop
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 8057.0
-      throughput: 124.11567580985478
+      inference_time: 7507.0
+      throughput: 133.20900492873318
       estimated_peak_memory_range:
-        min: 24772608
-        max: 65740448
+        min: 27897856
+        max: 69661040
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jnp18o72g
+      job_id: j1pvzvwrg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.613103Z'
+    timestamp: '2024-06-08T22:25:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 8777.0
-      throughput: 113.93414606357526
+      inference_time: 8693.0
+      throughput: 115.03508570113885
       estimated_peak_memory_range:
-        min: 7831552
-        max: 10988912
+        min: 3923968
+        max: 21721296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 288
-      job_id: j1pvw24jg
+      job_id: jogkr33n5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.613145Z'
+    timestamp: '2024-06-08T22:25:21Z'
   - torchscript_onnx_ort:
-      inference_time: 10792.0
-      throughput: 92.66123054114159
+      inference_time: 10798.0
+      throughput: 92.60974254491572
       estimated_peak_memory_range:
-        min: 55435264
-        max: 55435264
+        min: 72966144
+        max: 72966144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jvgdv68eg
+      job_id: j7gjkele5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.613189Z'
+    timestamp: '2024-06-08T22:25:29Z'
diff --git a/qai_hub_models/models/fastsam_x/README.md b/qai_hub_models/models/fastsam_x/README.md
index b3c84891..0c34311d 100644
--- a/qai_hub_models/models/fastsam_x/README.md
+++ b/qai_hub_models/models/fastsam_x/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FastSam-X can be found
   [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE)
 
 ## References
 * [Fast Segment Anything](https://arxiv.org/abs/2306.12156)
diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py
index 32977477..fb2a5872 100644
--- a/qai_hub_models/models/fastsam_x/export.py
+++ b/qai_hub_models/models/fastsam_x/export.py
@@ -190,7 +190,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml
index eb05cebb..51138c0a 100644
--- a/qai_hub_models/models/fastsam_x/perf.yaml
+++ b/qai_hub_models/models/fastsam_x/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FastSam-X
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 50032.0
-      throughput: 19.987208186760473
+      inference_time: 53656.0
+      throughput: 18.637244669748025
       estimated_peak_memory_range:
         min: 9220096
-        max: 14175736
+        max: 14211840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 420
-      job_id: jqp4wemvg
+      job_id: jygzvr7xp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 50303.0
-      throughput: 19.879530047909668
+      inference_time: 51625.0
+      throughput: 19.37046004842615
       estimated_peak_memory_range:
-        min: 26087424
-        max: 347775896
+        min: 25325568
+        max: 343683192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 421
-      job_id: j2p0rod6p
+      job_id: j0pxey1l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.639222Z'
+    timestamp: '2024-06-08T22:26:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 36166.0
-      throughput: 27.650279267820604
+      inference_time: 36229.0
+      throughput: 27.602197134891938
       estimated_peak_memory_range:
-        min: 7733248
-        max: 142126784
+        min: 8450048
+        max: 144127216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 420
-      job_id: j0px1031g
+      job_id: jz5wmq9mg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 36822.0
-      throughput: 27.1576774754223
+      inference_time: 37119.0
+      throughput: 26.94038093698645
       estimated_peak_memory_range:
-        min: 28086272
-        max: 92380224
+        min: 29941760
+        max: 95002704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 421
-      job_id: j1p87j6x5
+      job_id: jo5mv3z95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.639336Z'
+    timestamp: '2024-06-08T22:26:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 52994.0
-      throughput: 18.870060761595653
+      inference_time: 49800.0
+      throughput: 20.080321285140563
       estimated_peak_memory_range:
-        min: 3325952
-        max: 7764344
+        min: 9379840
+        max: 47006488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 420
-      job_id: jo5mz9owp
+      job_id: jmg99w48g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.639421Z'
+    timestamp: '2024-06-08T22:25:58Z'
   - torchscript_onnx_ort:
-      inference_time: 49386.0
-      throughput: 20.248653464544606
+      inference_time: 49559.0
+      throughput: 20.177969692689523
       estimated_peak_memory_range:
-        min: 24240128
-        max: 24240128
+        min: 30785536
+        max: 30785536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 421
-      job_id: jogky6o2p
+      job_id: jegnr3eq5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.639483Z'
+    timestamp: '2024-06-08T22:26:06Z'
diff --git a/qai_hub_models/models/fcn_resnet50/README.md b/qai_hub_models/models/fcn_resnet50/README.md
index c2af6df1..dba323b0 100644
--- a/qai_hub_models/models/fcn_resnet50/README.md
+++ b/qai_hub_models/models/fcn_resnet50/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FCN-ResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038)
diff --git a/qai_hub_models/models/fcn_resnet50/export.py b/qai_hub_models/models/fcn_resnet50/export.py
index 4c3eb67f..cc121898 100644
--- a/qai_hub_models/models/fcn_resnet50/export.py
+++ b/qai_hub_models/models/fcn_resnet50/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/fcn_resnet50/info.yaml b/qai_hub_models/models/fcn_resnet50/info.yaml
index fcaeaafa..eb1d3a92 100644
--- a/qai_hub_models/models/fcn_resnet50/info.yaml
+++ b/qai_hub_models/models/fcn_resnet50/info.yaml
@@ -37,3 +37,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - coco
+labels_file: voc_labels.txt
diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml
index d6b15006..8d3c9539 100644
--- a/qai_hub_models/models/fcn_resnet50/perf.yaml
+++ b/qai_hub_models/models/fcn_resnet50/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FCN-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 42516.0
-      throughput: 23.520556966788973
+      inference_time: 41432.0
+      throughput: 24.135933577910794
       estimated_peak_memory_range:
-        min: 22130688
-        max: 24822256
+        min: 22097920
+        max: 25129176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: j1glkwo8p
+      job_id: jep23lmqg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 42169.0
-      throughput: 23.714102776921436
+      inference_time: 42249.0
+      throughput: 23.669199270988663
       estimated_peak_memory_range:
-        min: 3162112
-        max: 21014016
+        min: 3497984
+        max: 21232048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: jwgovdox5
+      job_id: j1p8wz7op
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 43060.0
-      throughput: 23.22340919647004
+      inference_time: 43347.0
+      throughput: 23.069647265093316
       estimated_peak_memory_range:
-        min: 44617728
-        max: 204107384
+        min: 44056576
+        max: 204120472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jygz7z8kp
+      job_id: jw56qn3yg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.665232Z'
+    timestamp: '2024-06-08T22:26:38Z'
   - torchscript_onnx_tflite:
-      inference_time: 30738.0
-      throughput: 32.533021016331574
+      inference_time: 31357.0
+      throughput: 31.890805880664605
       estimated_peak_memory_range:
-        min: 21647360
-        max: 155527440
+        min: 49152
+        max: 137281408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jw561or0p
+      job_id: jqpyv6dlp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 31677.0
-      throughput: 31.56864602077217
+      inference_time: 31599.0
+      throughput: 31.64657109402196
       estimated_peak_memory_range:
-        min: 2584576
-        max: 80802912
+        min: 3162112
+        max: 80794592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: j1pvw2ejg
+      job_id: jogkr3yn5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 32035.0
-      throughput: 31.21585765568909
+      inference_time: 32324.0
+      throughput: 30.936765251825268
       estimated_peak_memory_range:
-        min: 47235072
-        max: 116713856
+        min: 43311104
+        max: 107423312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jz5w9y16p
+      job_id: j1p3qe4n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.665299Z'
+    timestamp: '2024-06-08T22:26:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 42133.0
-      throughput: 23.734364987064772
+      inference_time: 41734.0
+      throughput: 23.9612785738247
       estimated_peak_memory_range:
-        min: 12288
-        max: 30781808
+        min: 22106112
+        max: 24857096
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: j1p3moxlg
+      job_id: j2p0elrn5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 42154.0
-      throughput: 23.72254115860891
+      inference_time: 42169.0
+      throughput: 23.714102776921436
       estimated_peak_memory_range:
-        min: 3178496
-        max: 21842136
+        min: 3166208
+        max: 19865232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: jlpev6815
+      job_id: j1gle30mp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.665342Z'
+    timestamp: '2024-06-08T22:26:37Z'
   - torchscript_onnx_qnn:
-      inference_time: 68527.0
-      throughput: 14.59278824404979
+      inference_time: 70340.0
+      throughput: 14.216661927779358
       estimated_peak_memory_range:
         min: 3153920
         max: 3153920
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: j7gjl3oxp
+      job_id: jn5q932op
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 42361.0
-      throughput: 23.606619296050614
+      inference_time: 42281.0
+      throughput: 23.651285447364064
       estimated_peak_memory_range:
-        min: 15384576
-        max: 15384576
+        min: 9379840
+        max: 9379840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jmg94oxl5
+      job_id: jwgoe31kp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.665389Z'
+    timestamp: '2024-06-08T22:26:40Z'
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/README.md b/qai_hub_models/models/fcn_resnet50_quantized/README.md
index e74a2898..3ed8a452 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/README.md
+++ b/qai_hub_models/models/fcn_resnet50_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FCN-ResNet50-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038)
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/export.py b/qai_hub_models/models/fcn_resnet50_quantized/export.py
index 943f9fe5..646bdaab 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/export.py
+++ b/qai_hub_models/models/fcn_resnet50_quantized/export.py
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/info.yaml b/qai_hub_models/models/fcn_resnet50_quantized/info.yaml
index 21939860..19b875e5 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/info.yaml
+++ b/qai_hub_models/models/fcn_resnet50_quantized/info.yaml
@@ -39,3 +39,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - coco
+labels_file: voc_labels.txt
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml
index acff4279..83867788 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml
+++ b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: FCN-ResNet50-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 14122.0
-      throughput: 70.8114997875655
+      inference_time: 14137.0
+      throughput: 70.73636556553724
       estimated_peak_memory_range:
-        min: 6492160
-        max: 8334248
+        min: 7475200
+        max: 59586696
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: jqp4we9vg
+      job_id: j7gjke0e5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 15195.0
-      throughput: 65.81112207963146
+      inference_time: 15266.0
+      throughput: 65.5050438883794
       estimated_peak_memory_range:
-        min: 16384
-        max: 135971112
+        min: 839680
+        max: 9922576
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jep2mov45
+      job_id: jnp1qed7g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 18653.0
-      throughput: 53.61067924730607
+      inference_time: 12789.0
+      throughput: 78.19219641879741
       estimated_peak_memory_range:
-        min: 44085248
-        max: 93745064
+        min: 9297920
+        max: 58295544
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 82
+        layers_on_npu: 80
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 82
-      job_id: jn5q24m45
+        total_layers: 80
+      job_id: j0pxey7l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.699656Z'
+    timestamp: '2024-06-08T22:28:08Z'
   - torchscript_onnx_tflite:
-      inference_time: 10017.0
-      throughput: 99.83028850953379
+      inference_time: 10012.0
+      throughput: 99.88014382740711
       estimated_peak_memory_range:
-        min: 45056
-        max: 82919904
+        min: 73728
+        max: 83075216
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: j0px10d1g
+      job_id: jlpe4krv5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11194.0
-      throughput: 89.33357155619082
+      inference_time: 11234.0
+      throughput: 89.01548869503294
       estimated_peak_memory_range:
-        min: 1011712
-        max: 55512672
+        min: 802816
+        max: 55488784
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jqpyd877p
+      job_id: jvgd7orzg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 14507.0
-      throughput: 68.93223960846488
+      inference_time: 9614.0
+      throughput: 104.01497815685458
       estimated_peak_memory_range:
-        min: 51236864
-        max: 96653104
+        min: 11309056
+        max: 56165696
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 82
+        layers_on_npu: 80
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 82
-      job_id: j1glkw18p
+        total_layers: 80
+      job_id: jegnr39q5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.699713Z'
+    timestamp: '2024-06-08T22:28:09Z'
   - torchscript_onnx_tflite:
-      inference_time: 14106.0
-      throughput: 70.8918190840777
+      inference_time: 14165.0
+      throughput: 70.5965407695023
       estimated_peak_memory_range:
-        min: 5558272
-        max: 7622896
+        min: 5574656
+        max: 14323152
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: jo5mz9dwp
+      job_id: jygzvrxxp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 15191.0
-      throughput: 65.82845105654664
+      inference_time: 15225.0
+      throughput: 65.68144499178982
       estimated_peak_memory_range:
-        min: 36864
-        max: 15082688
+        min: 811008
+        max: 30220216
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j1p87j4x5
+      job_id: jqp4jvx1p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.699752Z'
+    timestamp: '2024-06-08T22:28:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 89233.0
-      throughput: 11.20661638631448
+      inference_time: 89203.0
+      throughput: 11.210385300942793
       estimated_peak_memory_range:
-        min: 5718016
-        max: 90674400
+        min: 6000640
+        max: 92646944
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: jegne17rg
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 90925.0
-      throughput: 10.998075336816058
-      estimated_peak_memory_range:
-        min: 929792
-        max: 84664912
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 79
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 79
-      job_id: jogky692p
+      job_id: jz5wmqdmg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.699790Z'
+    timestamp: '2024-06-08T22:28:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 752252.0
-      throughput: 1.3293417631325672
+      inference_time: 728106.0
+      throughput: 1.373426396705974
       estimated_peak_memory_range:
-        min: 65630208
-        max: 185474512
+        min: 33034240
+        max: 70768096
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 9
         layers_on_cpu: 12
         total_layers: 87
-      job_id: jopryxn9g
+      job_id: jmg99w38g
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.699812Z'
+    timestamp: '2024-06-08T22:28:03Z'
   - torchscript_onnx_qnn:
-      inference_time: 16847.0
-      throughput: 59.357749154152074
+      inference_time: 16789.0
+      throughput: 59.562808982071594
       estimated_peak_memory_range:
-        min: 786432
-        max: 786432
+        min: 794624
+        max: 794624
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j2p0rov6p
+      job_id: jz57vxj95
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 17508.0
-      throughput: 57.11674663011195
+      inference_time: 12535.0
+      throughput: 79.77662544874352
       estimated_peak_memory_range:
-        min: 69443584
-        max: 69443584
+        min: 835584
+        max: 835584
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 82
+        layers_on_npu: 80
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 82
-      job_id: jw561od0p
+        total_layers: 80
+      job_id: jopr1e47g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.699852Z'
+    timestamp: '2024-06-08T22:28:10Z'
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/README.md b/qai_hub_models/models/ffnet_122ns_lowres/README.md
index 6d34b21a..f6d57fa7 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/README.md
+++ b/qai_hub_models/models/ffnet_122ns_lowres/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-122NS-LowRes can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/export.py b/qai_hub_models/models/ffnet_122ns_lowres/export.py
index 53651a1f..98404700 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/export.py
+++ b/qai_hub_models/models/ffnet_122ns_lowres/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/info.yaml b/qai_hub_models/models/ffnet_122ns_lowres/info.yaml
index a40f6ccd..8ae3d60b 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/info.yaml
+++ b/qai_hub_models/models/ffnet_122ns_lowres/info.yaml
@@ -36,3 +36,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
index 4c3cbe64..996c082b 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
+++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FFNet-122NS-LowRes
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 10505.0
-      throughput: 95.19276534983341
+      inference_time: 9538.0
+      throughput: 104.84378276368211
       estimated_peak_memory_range:
-        min: 647168
-        max: 3107656
+        min: 0
+        max: 1882960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: j1pvw29jg
+      job_id: j1p8wz3op
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 10881.0
-      throughput: 91.90331770976933
+      inference_time: 10684.0
+      throughput: 93.59790340696368
       estimated_peak_memory_range:
-        min: 6311936
-        max: 30292720
+        min: 7036928
+        max: 23266984
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: jygz7z4kp
+      job_id: j1gle3emp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 7968.0
-      throughput: 125.50200803212851
+      inference_time: 7933.0
+      throughput: 126.05571662674902
       estimated_peak_memory_range:
-        min: 1523712
-        max: 135656304
+        min: 1155072
+        max: 141586240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 350
-      job_id: jvgdv62eg
+      job_id: j1pvzvzrg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.743297Z'
+    timestamp: '2024-06-08T22:28:42Z'
   - torchscript_onnx_tflite:
-      inference_time: 7344.0
-      throughput: 136.16557734204792
+      inference_time: 6833.0
+      throughput: 146.34860237084735
       estimated_peak_memory_range:
-        min: 667648
-        max: 60722624
+        min: 659456
+        max: 61929920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: j7gjl3wxp
+      job_id: jogkr3ln5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7564.0
-      throughput: 132.20518244315178
+      inference_time: 7606.0
+      throughput: 131.47515119642387
       estimated_peak_memory_range:
         min: 6307840
-        max: 88725056
+        max: 93102864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: jz5w9y46p
+      job_id: jw56qnqyg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5884.0
-      throughput: 169.9524133242692
+      inference_time: 5594.0
+      throughput: 178.7629603146228
       estimated_peak_memory_range:
-        min: 8749056
-        max: 63861488
+        min: 6307840
+        max: 59711872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 350
-      job_id: jz57do9l5
+      job_id: j7gjkeke5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.743426Z'
+    timestamp: '2024-06-08T22:28:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 10684.0
-      throughput: 93.59790340696368
+      inference_time: 9545.0
+      throughput: 104.76689366160294
       estimated_peak_memory_range:
-        min: 684032
-        max: 10103760
+        min: 0
+        max: 2096664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: jlpev6l15
+      job_id: jn5q937op
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 10891.0
-      throughput: 91.81893306399779
+      inference_time: 10716.0
+      throughput: 93.3184023889511
       estimated_peak_memory_range:
-        min: 6307840
-        max: 38441144
+        min: 6311936
+        max: 40648480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: jnp18o62g
+      job_id: jwgoe3ekp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.743508Z'
+    timestamp: '2024-06-08T22:28:41Z'
   - torchscript_onnx_qnn:
-      inference_time: 17476.0
-      throughput: 57.221332112611584
+      inference_time: 17375.0
+      throughput: 57.55395683453237
       estimated_peak_memory_range:
         min: 6303744
         max: 6303744
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: jmg94odl5
+      job_id: j1p3qeqn5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 7566.0
-      throughput: 132.17023526301878
+      inference_time: 7523.0
+      throughput: 132.92569453675395
       estimated_peak_memory_range:
-        min: 9342976
-        max: 9342976
+        min: 6332416
+        max: 6332416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 350
-      job_id: j0px10x1g
+      job_id: jlpe4k4v5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.743602Z'
+    timestamp: '2024-06-08T22:28:44Z'
diff --git a/qai_hub_models/models/ffnet_40s/README.md b/qai_hub_models/models/ffnet_40s/README.md
index f1911ec4..0bc90d39 100644
--- a/qai_hub_models/models/ffnet_40s/README.md
+++ b/qai_hub_models/models/ffnet_40s/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-40S can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_40s/export.py b/qai_hub_models/models/ffnet_40s/export.py
index 31513e00..879b2dbd 100644
--- a/qai_hub_models/models/ffnet_40s/export.py
+++ b/qai_hub_models/models/ffnet_40s/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/ffnet_40s/info.yaml b/qai_hub_models/models/ffnet_40s/info.yaml
index 967b8b32..01f9323a 100644
--- a/qai_hub_models/models/ffnet_40s/info.yaml
+++ b/qai_hub_models/models/ffnet_40s/info.yaml
@@ -37,3 +37,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml
index 5e012497..2da118a9 100644
--- a/qai_hub_models/models/ffnet_40s/perf.yaml
+++ b/qai_hub_models/models/ffnet_40s/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FFNet-40S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 23135.0
-      throughput: 43.22455154527772
+      inference_time: 23193.0
+      throughput: 43.11645755184754
       estimated_peak_memory_range:
-        min: 2457600
-        max: 4282696
+        min: 2531328
+        max: 4441664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jopryxw9g
+      job_id: jz5wmqmmg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 17200.0
-      throughput: 58.13953488372093
+      inference_time: 17411.0
+      throughput: 57.43495491356039
       estimated_peak_memory_range:
-        min: 23400448
-        max: 42153440
+        min: 25214976
+        max: 45407080
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: j2p0ro66p
+      job_id: jvgd7o7zg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 27788.0
-      throughput: 35.98675687347056
+      inference_time: 27393.0
+      throughput: 36.50567663271639
       estimated_peak_memory_range:
-        min: 31006720
-        max: 110905224
+        min: 34656256
+        max: 113886552
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 142
-      job_id: j1glkwl8p
+      job_id: jo5mv3v95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.778232Z'
+    timestamp: '2024-06-08T22:29:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 16624.0
-      throughput: 60.15399422521656
+      inference_time: 16820.0
+      throughput: 59.45303210463734
       estimated_peak_memory_range:
-        min: 40960
-        max: 97875920
+        min: 757760
+        max: 102036720
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jep2moe45
+      job_id: jmg99w98g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 12551.0
-      throughput: 79.67492630069317
+      inference_time: 12560.0
+      throughput: 79.61783439490446
       estimated_peak_memory_range:
-        min: 25219072
-        max: 85826080
+        min: 132333568
+        max: 190814608
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: j1p87j1x5
+      job_id: jz57vxv95
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 19852.0
-      throughput: 50.372758412250654
+      inference_time: 19832.0
+      throughput: 50.42355788624445
       estimated_peak_memory_range:
-        min: 32559104
-        max: 73485856
+        min: 29405184
+        max: 74127520
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 142
-      job_id: jw561ow0p
+      job_id: jegnr3rq5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.778307Z'
+    timestamp: '2024-06-08T22:29:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 22986.0
-      throughput: 43.50474201687984
+      inference_time: 23566.0
+      throughput: 42.43401510650938
       estimated_peak_memory_range:
-        min: 2535424
-        max: 33407872
+        min: 2564096
+        max: 4836528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jqpyd8m7p
+      job_id: jnp1qeq7g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 17314.0
-      throughput: 57.75672865888876
+      inference_time: 17310.0
+      throughput: 57.77007510109763
       estimated_peak_memory_range:
-        min: 24948736
-        max: 47341064
+        min: 25202688
+        max: 45281048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jn5q24v45
+      job_id: j0pxeyel5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.778354Z'
+    timestamp: '2024-06-08T22:29:13Z'
   - torchscript_onnx_qnn:
-      inference_time: 23238.0
-      throughput: 43.03296324984939
+      inference_time: 23356.0
+      throughput: 42.81555060798082
       estimated_peak_memory_range:
         min: 25219072
         max: 25219072
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jogky682p
+      job_id: jqp4jvj1p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 26282.0
-      throughput: 38.048854729472644
+      inference_time: 26356.0
+      throughput: 37.942024586431934
       estimated_peak_memory_range:
-        min: 25227264
-        max: 25227264
+        min: 25219072
+        max: 25219072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 142
-      job_id: j1p3mo6lg
+      job_id: jopr1e17g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.778407Z'
+    timestamp: '2024-06-08T22:29:16Z'
diff --git a/qai_hub_models/models/ffnet_40s_quantized/README.md b/qai_hub_models/models/ffnet_40s_quantized/README.md
index 7767cf30..b730ceb5 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/README.md
+++ b/qai_hub_models/models/ffnet_40s_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-40S-Quantized can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py
index 91d168bb..ad846a21 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_40s_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -228,7 +228,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/ffnet_40s_quantized/info.yaml b/qai_hub_models/models/ffnet_40s_quantized/info.yaml
index 163abd5d..4c169ebc 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/info.yaml
+++ b/qai_hub_models/models/ffnet_40s_quantized/info.yaml
@@ -38,3 +38,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
index 4413dde3..c8a1dcdd 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: FFNet-40S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6426.0
-      throughput: 155.6178026766262
+      inference_time: 6442.0
+      throughput: 155.2312946289972
       estimated_peak_memory_range:
-        min: 2113536
-        max: 4414960
+        min: 36864
+        max: 1593576
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,22 +54,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: j1pvw27jg
+      job_id: jqpyv6vlp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 11412.0
-      throughput: 87.62705923589205
+      inference_time: 9268.0
+      throughput: 107.89814415192059
       estimated_peak_memory_range:
-        min: 27074560
-        max: 55662472
+        min: 7577600
+        max: 25025832
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 92
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jnp18oj8g
+        total_layers: 92
+      job_id: j1pvzv4rg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +78,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.815172Z'
+    timestamp: '2024-06-08T22:30:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 4740.0
-      throughput: 210.9704641350211
+      inference_time: 4682.0
+      throughput: 213.58393848782572
       estimated_peak_memory_range:
-        min: 16384
-        max: 66762720
+        min: 12288
+        max: 67067712
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,22 +92,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: j7gjl3qxp
+      job_id: j2p0elen5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 8772.0
-      throughput: 113.99908800729594
+      inference_time: 7185.0
+      throughput: 139.17884481558804
       estimated_peak_memory_range:
-        min: 32075776
-        max: 65683792
+        min: 6955008
+        max: 47776688
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 92
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jvgdv63rg
+        total_layers: 92
+      job_id: j7gjke1e5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +116,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.815222Z'
+    timestamp: '2024-06-08T22:30:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 6448.0
-      throughput: 155.08684863523573
+      inference_time: 6401.0
+      throughput: 156.22558975160132
       estimated_peak_memory_range:
         min: 651264
-        max: 2142088
+        max: 2179136
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,7 +130,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: jlpev6y15
+      job_id: j1p8wzwop
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -139,13 +139,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.815247Z'
+    timestamp: '2024-06-08T22:29:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 34910.0
-      throughput: 28.64508736751647
+      inference_time: 35462.0
+      throughput: 28.199199142744344
       estimated_peak_memory_range:
-        min: 131072
-        max: 37241984
+        min: 163840
+        max: 38805968
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -153,7 +153,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: jygz7znkp
+      job_id: jogkr3rn5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -162,13 +162,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.815272Z'
+    timestamp: '2024-06-08T22:29:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 189525.0
-      throughput: 5.276348766653475
+      inference_time: 189203.0
+      throughput: 5.285328456736944
       estimated_peak_memory_range:
-        min: 827392
-        max: 2998544
+        min: 835584
+        max: 9440536
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -176,7 +176,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: jz5w9y76p
+      job_id: jn5q939op
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -185,21 +185,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.815296Z'
+    timestamp: '2024-06-08T22:29:56Z'
   - torchscript_onnx_ort:
-      inference_time: 10833.0
-      throughput: 92.31053263177328
+      inference_time: 8436.0
+      throughput: 118.53959222380276
       estimated_peak_memory_range:
-        min: 25223168
-        max: 25223168
+        min: 23719936
+        max: 23719936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 92
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jz57do4v5
+        total_layers: 92
+      job_id: jlpe4k2v5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -208,4 +208,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.815322Z'
+    timestamp: '2024-06-08T22:30:04Z'
diff --git a/qai_hub_models/models/ffnet_54s/README.md b/qai_hub_models/models/ffnet_54s/README.md
index 6aea8fe7..4122507a 100644
--- a/qai_hub_models/models/ffnet_54s/README.md
+++ b/qai_hub_models/models/ffnet_54s/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-54S can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_54s/export.py b/qai_hub_models/models/ffnet_54s/export.py
index 78f844ff..a573a9f5 100644
--- a/qai_hub_models/models/ffnet_54s/export.py
+++ b/qai_hub_models/models/ffnet_54s/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/ffnet_54s/info.yaml b/qai_hub_models/models/ffnet_54s/info.yaml
index 846f0dd0..c40cad2c 100644
--- a/qai_hub_models/models/ffnet_54s/info.yaml
+++ b/qai_hub_models/models/ffnet_54s/info.yaml
@@ -36,3 +36,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml
index 2c8c14fb..e912f5b5 100644
--- a/qai_hub_models/models/ffnet_54s/perf.yaml
+++ b/qai_hub_models/models/ffnet_54s/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FFNet-54S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 25448.0
-      throughput: 39.295818924866396
+      inference_time: 25403.0
+      throughput: 39.365429280006296
       estimated_peak_memory_range:
-        min: 2547712
-        max: 5357880
+        min: 4255744
+        max: 6909008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: j0px1043g
+      job_id: jz5wmqxmg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 19884.0
-      throughput: 50.291691812512575
+      inference_time: 20253.0
+      throughput: 49.37540117513455
       estimated_peak_memory_range:
-        min: 25227264
-        max: 49363400
+        min: 25219072
+        max: 49749016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jopryx00g
+      job_id: jvgd7o0zg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 30303.0
-      throughput: 33.000033000033
+      inference_time: 30396.0
+      throughput: 32.89906566653507
       estimated_peak_memory_range:
-        min: 30199808
-        max: 136386576
+        min: 25182208
+        max: 90860800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 177
-      job_id: j1p87jxk5
+      job_id: jvgd7o06g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.847740Z'
+    timestamp: '2024-06-08T22:30:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 18458.0
-      throughput: 54.17705060136526
+      inference_time: 18529.0
+      throughput: 53.96945328943818
       estimated_peak_memory_range:
-        min: 2494464
-        max: 110603872
+        min: 2461696
+        max: 110619440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: jo5mz9mdp
+      job_id: jmg99w88g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14595.0
-      throughput: 68.51661527920521
+      inference_time: 14443.0
+      throughput: 69.23769300006924
       estimated_peak_memory_range:
-        min: 21004288
-        max: 88103104
+        min: 20983808
+        max: 91014848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jep2mowr5
+      job_id: jz5wmqx4g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 22562.0
-      throughput: 44.32231185178619
+      inference_time: 23366.0
+      throughput: 42.79722673970727
       estimated_peak_memory_range:
-        min: 29396992
-        max: 70535856
+        min: 29618176
+        max: 74645360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 177
-      job_id: jogky64wp
+      job_id: jz57vx6n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.847824Z'
+    timestamp: '2024-06-08T22:30:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 25423.0
-      throughput: 39.33446092121307
+      inference_time: 25775.0
+      throughput: 38.797284190106694
       estimated_peak_memory_range:
-        min: 2543616
-        max: 4749104
+        min: 2547712
+        max: 5263000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: jegne1nkg
+      job_id: jnp1qe37g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 20236.0
-      throughput: 49.4168808064835
+      inference_time: 20126.0
+      throughput: 49.686972075921695
       estimated_peak_memory_range:
-        min: 25235456
-        max: 45160432
+        min: 25214976
+        max: 40883168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: j2p0roj9p
+      job_id: jnp1qe3ng
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.847877Z'
+    timestamp: '2024-06-08T22:30:33Z'
   - torchscript_onnx_qnn:
-      inference_time: 25826.0
-      throughput: 38.72066909316193
+      inference_time: 25735.0
+      throughput: 38.857586943850784
       estimated_peak_memory_range:
         min: 25219072
         max: 25219072
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jqpyd8x8p
+      job_id: jmg99w8mg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 29590.0
-      throughput: 33.795201081446436
+      inference_time: 29431.0
+      throughput: 33.97777853283952
       estimated_peak_memory_range:
-        min: 25219072
-        max: 25219072
+        min: 25223168
+        max: 25223168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 177
-      job_id: jn5q24yn5
+      job_id: jqp4jv82p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.847941Z'
+    timestamp: '2024-06-08T22:30:36Z'
diff --git a/qai_hub_models/models/ffnet_54s_quantized/README.md b/qai_hub_models/models/ffnet_54s_quantized/README.md
index 9f4d0a9c..5ab17ab3 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/README.md
+++ b/qai_hub_models/models/ffnet_54s_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-54S-Quantized can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py
index ec7c47fc..f16f2f1a 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_54s_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -228,7 +228,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/ffnet_54s_quantized/info.yaml b/qai_hub_models/models/ffnet_54s_quantized/info.yaml
index a7f45fd7..60940868 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/info.yaml
+++ b/qai_hub_models/models/ffnet_54s_quantized/info.yaml
@@ -38,3 +38,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
index 1b654591..31883584 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: FFNet-54S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7120.0
-      throughput: 140.4494382022472
+      inference_time: 7119.0
+      throughput: 140.4691670178396
       estimated_peak_memory_range:
-        min: 2142208
-        max: 10612096
+        min: 688128
+        max: 2335176
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,22 +54,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jw561o76p
+      job_id: jo5mv3475
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 11873.0
-      throughput: 84.22471153036301
+      inference_time: 9678.0
+      throughput: 103.32713370531101
       estimated_peak_memory_range:
-        min: 30167040
-        max: 64592624
+        min: 7581696
+        max: 40900680
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 115
+        layers_on_npu: 113
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 115
-      job_id: jvgdv6drg
+        total_layers: 113
+      job_id: j1gle3o2p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +78,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.882272Z'
+    timestamp: '2024-06-08T22:31:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 5175.0
-      throughput: 193.23671497584542
+      inference_time: 5120.0
+      throughput: 195.3125
       estimated_peak_memory_range:
-        min: 241664
-        max: 74191168
+        min: 45056
+        max: 74881936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,22 +92,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: j1p3mo93g
+      job_id: jegnr3xj5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 8976.0
-      throughput: 111.40819964349376
+      inference_time: 7395.0
+      throughput: 135.2265043948614
       estimated_peak_memory_range:
-        min: 18096128
-        max: 52516464
+        min: 5738496
+        max: 42316048
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 115
+        layers_on_npu: 113
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 115
-      job_id: jz57doev5
+        total_layers: 113
+      job_id: jw56qnrng
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +116,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.882322Z'
+    timestamp: '2024-06-08T22:31:29Z'
   - torchscript_onnx_tflite:
       inference_time: 7096.0
       throughput: 140.92446448703495
       estimated_peak_memory_range:
-        min: 655360
-        max: 2310944
+        min: 61440
+        max: 14772576
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,7 +130,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jwgovdrq5
+      job_id: jopr1e9kg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -139,13 +139,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.882348Z'
+    timestamp: '2024-06-08T22:31:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 39841.0
-      throughput: 25.09977159207851
+      inference_time: 39816.0
+      throughput: 25.11553144464537
       estimated_peak_memory_range:
-        min: 12288
-        max: 39329584
+        min: 122880
+        max: 41244048
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -153,7 +153,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: j1pvw2lkg
+      job_id: jep23lj6g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -162,13 +162,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.882373Z'
+    timestamp: '2024-06-08T22:31:21Z'
   - torchscript_onnx_tflite:
-      inference_time: 204729.0
-      throughput: 4.884505858964777
+      inference_time: 203928.0
+      throughput: 4.903691498960417
       estimated_peak_memory_range:
-        min: 237568
-        max: 7136480
+        min: 225280
+        max: 7415104
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -176,7 +176,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: j7gjl3rvp
+      job_id: jqpyv6n0p
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -185,21 +185,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.882397Z'
+    timestamp: '2024-06-08T22:31:22Z'
   - torchscript_onnx_ort:
-      inference_time: 11333.0
-      throughput: 88.23788934968675
+      inference_time: 8994.0
+      throughput: 111.185234600845
       estimated_peak_memory_range:
-        min: 25227264
-        max: 25227264
+        min: 6340608
+        max: 6340608
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 115
+        layers_on_npu: 113
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 115
-      job_id: jqp4wey8g
+        total_layers: 113
+      job_id: j1p3qexm5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -208,4 +208,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.882423Z'
+    timestamp: '2024-06-08T22:31:30Z'
diff --git a/qai_hub_models/models/ffnet_78s/README.md b/qai_hub_models/models/ffnet_78s/README.md
index c3f6b6dc..c3d9f2d0 100644
--- a/qai_hub_models/models/ffnet_78s/README.md
+++ b/qai_hub_models/models/ffnet_78s/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-78S can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_78s/export.py b/qai_hub_models/models/ffnet_78s/export.py
index 01600621..f0bec0bf 100644
--- a/qai_hub_models/models/ffnet_78s/export.py
+++ b/qai_hub_models/models/ffnet_78s/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/ffnet_78s/info.yaml b/qai_hub_models/models/ffnet_78s/info.yaml
index 1f1d8cf9..233239ad 100644
--- a/qai_hub_models/models/ffnet_78s/info.yaml
+++ b/qai_hub_models/models/ffnet_78s/info.yaml
@@ -36,3 +36,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml
index 6c7da0ea..1d0087dd 100644
--- a/qai_hub_models/models/ffnet_78s/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FFNet-78S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 29028.0
-      throughput: 34.44949703734326
+      inference_time: 29896.0
+      throughput: 33.44929087503345
       estimated_peak_memory_range:
-        min: 2166784
-        max: 4957752
+        min: 2584576
+        max: 5177832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jo5mz90dp
+      job_id: j1pvzvezg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 24289.0
-      throughput: 41.170900407591915
+      inference_time: 23500.0
+      throughput: 42.5531914893617
       estimated_peak_memory_range:
-        min: 25210880
-        max: 56144640
+        min: 25223168
+        max: 55846352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jep2morr5
+      job_id: jygzvr84p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 33942.0
-      throughput: 29.46202345177067
+      inference_time: 34791.0
+      throughput: 28.743065735391337
       estimated_peak_memory_range:
-        min: 30212096
-        max: 151965672
+        min: 31657984
+        max: 174636584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: jogky62wp
+      job_id: jvgd7o86g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.915116Z'
+    timestamp: '2024-06-08T22:32:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 21499.0
-      throughput: 46.513791339132055
+      inference_time: 21247.0
+      throughput: 47.065468066079916
       estimated_peak_memory_range:
-        min: 1843200
-        max: 120056032
+        min: 684032
+        max: 120904016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jegne1zkg
+      job_id: j7gjkeo15
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 17622.0
-      throughput: 56.74724775848372
+      inference_time: 17520.0
+      throughput: 57.077625570776256
       estimated_peak_memory_range:
-        min: 20983808
-        max: 102064896
+        min: 21012480
+        max: 102988784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jqpyd8o8p
+      job_id: jz5wmq84g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 26773.0
-      throughput: 37.35106263773204
+      inference_time: 25762.0
+      throughput: 38.816862044872295
       estimated_peak_memory_range:
-        min: 29380608
-        max: 82049232
+        min: 31490048
+        max: 82980160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: jn5q24ln5
+      job_id: jz57vxkn5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.915207Z'
+    timestamp: '2024-06-08T22:32:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 29503.0
-      throughput: 33.89485815001864
+      inference_time: 29131.0
+      throughput: 34.327692149256805
       estimated_peak_memory_range:
-        min: 2560000
-        max: 5083704
+        min: 2592768
+        max: 5433672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jopryxl0g
+      job_id: jlpe4k885
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23855.0
-      throughput: 41.919932928107315
+      inference_time: 23774.0
+      throughput: 42.06275763439051
       estimated_peak_memory_range:
-        min: 25219072
-        max: 47387568
+        min: 27922432
+        max: 51160616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: j1p87jek5
+      job_id: jnp1qe7ng
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.915264Z'
+    timestamp: '2024-06-08T22:32:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 32527.0
-      throughput: 30.743689857656715
+      inference_time: 32569.0
+      throughput: 30.70404372255826
       estimated_peak_memory_range:
         min: 25214976
         max: 25214976
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: j2p0rom9p
+      job_id: jmg99wkmg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 33218.0
-      throughput: 30.104160394966584
+      inference_time: 33100.0
+      throughput: 30.211480362537763
       estimated_peak_memory_range:
-        min: 34959360
-        max: 34959360
+        min: 25219072
+        max: 25219072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: j1glkwyjp
+      job_id: jqp4jvm2p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.915328Z'
+    timestamp: '2024-06-08T22:32:05Z'
diff --git a/qai_hub_models/models/ffnet_78s_lowres/README.md b/qai_hub_models/models/ffnet_78s_lowres/README.md
index ac546964..306f938a 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/README.md
+++ b/qai_hub_models/models/ffnet_78s_lowres/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-78S-LowRes can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_78s_lowres/export.py b/qai_hub_models/models/ffnet_78s_lowres/export.py
index b0997e75..1ee8b996 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/export.py
+++ b/qai_hub_models/models/ffnet_78s_lowres/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/ffnet_78s_lowres/info.yaml b/qai_hub_models/models/ffnet_78s_lowres/info.yaml
index c50bd7cc..ffded2ec 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/info.yaml
+++ b/qai_hub_models/models/ffnet_78s_lowres/info.yaml
@@ -37,3 +37,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
index aaa178ed..9f48808e 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: FFNet-78S-LowRes
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 10790.0
-      throughput: 92.67840593141798
+      inference_time: 10698.0
+      throughput: 93.47541596560104
       estimated_peak_memory_range:
-        min: 638976
-        max: 2877104
+        min: 12288
+        max: 8183320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: j1p3moz3g
+      job_id: jo5mv3o75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11359.0
-      throughput: 88.03591865481117
+      inference_time: 11228.0
+      throughput: 89.06305664410402
       estimated_peak_memory_range:
-        min: 2330624
-        max: 55394032
+        min: 2109440
+        max: 55500544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: j7gjl33vp
+      job_id: jep23l46g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 8843.0
-      throughput: 113.0837950921633
+      inference_time: 8904.0
+      throughput: 112.30907457322552
       estimated_peak_memory_range:
-        min: 2228224
-        max: 123699768
+        min: 1257472
+        max: 128438216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 238
-      job_id: jmg94oow5
+      job_id: jogkr39v5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.949492Z'
+    timestamp: '2024-06-08T22:32:36Z'
   - torchscript_onnx_tflite:
-      inference_time: 7636.0
-      throughput: 130.95861707700368
+      inference_time: 7663.0
+      throughput: 130.49719431032233
       estimated_peak_memory_range:
-        min: 45056
-        max: 52202592
+        min: 159744
+        max: 55453776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jwgovdlq5
+      job_id: jegnr3oj5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7922.0
-      throughput: 126.23074981065388
+      inference_time: 7958.0
+      throughput: 125.65971349585323
       estimated_peak_memory_range:
         min: 6307840
-        max: 73285696
+        max: 77174624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: jlpev66o5
+      job_id: jqpyv6q0p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6682.0
-      throughput: 149.655791679138
+      inference_time: 6766.0
+      throughput: 147.79781259237365
       estimated_peak_memory_range:
-        min: 6246400
-        max: 48859696
+        min: 6307840
+        max: 49412144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 238
-      job_id: jnp18oo8g
+      job_id: jn5q93mep
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.949582Z'
-  - torchscript_onnx_qnn:
-      inference_time: 11366.0
-      throughput: 87.98169980644026
+    timestamp: '2024-06-08T22:32:37Z'
+  - torchscript_onnx_tflite:
+      inference_time: 10676.0
+      throughput: 93.66804046459347
+      estimated_peak_memory_range:
+        min: 569344
+        max: 2852616
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 149
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 149
+      job_id: jopr1eokg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 11306.0
+      throughput: 88.44861135680169
       estimated_peak_memory_range:
-        min: 32768
-        max: 52846864
+        min: 16384
+        max: 52829760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: jz5w9yy3p
+      job_id: j1p8wz4qp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -163,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.949622Z'
+    timestamp: '2024-06-08T22:32:35Z'
   - torchscript_onnx_qnn:
-      inference_time: 20343.0
-      throughput: 49.1569581674286
+      inference_time: 20526.0
+      throughput: 48.718698236383126
       estimated_peak_memory_range:
         min: 6303744
         max: 6303744
@@ -177,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: jygz7zzop
+      job_id: j2p0elv05
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 8732.0
-      throughput: 114.52130096197892
+      inference_time: 8769.0
+      throughput: 114.03808872163303
       estimated_peak_memory_range:
-        min: 40693760
-        max: 40693760
+        min: 30912512
+        max: 30912512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 238
-      job_id: jvgdv66rg
+      job_id: j1gle312p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -201,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.949692Z'
+    timestamp: '2024-06-08T22:32:38Z'
diff --git a/qai_hub_models/models/ffnet_78s_quantized/README.md b/qai_hub_models/models/ffnet_78s_quantized/README.md
index 43dcb2af..eaaccda1 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/README.md
+++ b/qai_hub_models/models/ffnet_78s_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-78S-Quantized can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py
index 9b8ba13a..c09312bb 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_78s_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -228,7 +228,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/ffnet_78s_quantized/info.yaml b/qai_hub_models/models/ffnet_78s_quantized/info.yaml
index 86f97e34..7e6703e9 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/info.yaml
+++ b/qai_hub_models/models/ffnet_78s_quantized/info.yaml
@@ -38,3 +38,4 @@ license_type: bsd-3-clause
 deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
+labels_file: cityscapes_labels.txt
diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
index b26e69f8..9f134ee8 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: FFNet-78S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 8341.0
-      throughput: 119.88970147464333
+      inference_time: 8325.0
+      throughput: 120.12012012012012
       estimated_peak_memory_range:
-        min: 688128
-        max: 2360856
+        min: 663552
+        max: 8732048
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,22 +54,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: jqp4wee8g
+      job_id: j1p3qewm5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 12055.0
-      throughput: 82.9531314807134
+      inference_time: 9764.0
+      throughput: 102.41704219582138
       estimated_peak_memory_range:
-        min: 30191616
-        max: 78085232
+        min: 7573504
+        max: 52534152
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 151
+        layers_on_npu: 149
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 151
-      job_id: jn5q244n5
+        total_layers: 149
+      job_id: jvgd7oz6g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +78,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:40.983404Z'
+    timestamp: '2024-06-08T22:33:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 6017.0
-      throughput: 166.19577862722286
+      inference_time: 6002.0
+      throughput: 166.61112962345885
       estimated_peak_memory_range:
-        min: 16384
-        max: 86811680
+        min: 57344
+        max: 86915504
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,22 +92,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: j0px1003g
+      job_id: jwgoe341p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 9813.0
-      throughput: 101.9056353816366
+      inference_time: 7233.0
+      throughput: 138.25521913452232
       estimated_peak_memory_range:
-        min: 28508160
-        max: 75455072
+        min: 8347648
+        max: 53601040
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 151
+        layers_on_npu: 149
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 151
-      job_id: j1glkwwjp
+        total_layers: 149
+      job_id: jz57vx7n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +116,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:40.983462Z'
+    timestamp: '2024-06-08T22:33:41Z'
   - torchscript_onnx_tflite:
-      inference_time: 8357.0
-      throughput: 119.66016513102788
+      inference_time: 8359.0
+      throughput: 119.63153487259241
       estimated_peak_memory_range:
-        min: 753664
-        max: 2531424
+        min: 679936
+        max: 2337912
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,7 +130,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: jo5mz99dp
+      job_id: j1pvzv9zg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -139,13 +139,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:40.983492Z'
+    timestamp: '2024-06-08T22:33:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 44085.0
-      throughput: 22.683452421458547
+      inference_time: 44458.0
+      throughput: 22.49313959242431
       estimated_peak_memory_range:
-        min: 786432
-        max: 43758368
+        min: 729088
+        max: 44729792
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -153,7 +153,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: jegne11kg
+      job_id: j7gjkew15
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -162,13 +162,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:40.983523Z'
+    timestamp: '2024-06-08T22:33:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 216166.0
-      throughput: 4.626074405780742
+      inference_time: 219858.0
+      throughput: 4.548390324664101
       estimated_peak_memory_range:
-        min: 880640
-        max: 7838848
+        min: 393216
+        max: 2901200
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -176,7 +176,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: jopryxx0g
+      job_id: jlpe4kl85
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -185,21 +185,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:40.983553Z'
+    timestamp: '2024-06-08T22:33:34Z'
   - torchscript_onnx_ort:
-      inference_time: 11523.0
-      throughput: 86.78295582747549
+      inference_time: 9426.0
+      throughput: 106.08953957139826
       estimated_peak_memory_range:
-        min: 34738176
-        max: 34738176
+        min: 5931008
+        max: 5931008
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 151
+        layers_on_npu: 149
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 151
-      job_id: jw561oo6p
+        total_layers: 149
+      job_id: jqp4jv92p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -208,4 +208,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:40.983583Z'
+    timestamp: '2024-06-08T22:33:42Z'
diff --git a/qai_hub_models/models/googlenet/README.md b/qai_hub_models/models/googlenet/README.md
index 71a8d343..214ae1f8 100644
--- a/qai_hub_models/models/googlenet/README.md
+++ b/qai_hub_models/models/googlenet/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of GoogLeNet can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
diff --git a/qai_hub_models/models/googlenet/evaluate.py b/qai_hub_models/models/googlenet/evaluate.py
new file mode 100644
index 00000000..70a3da92
--- /dev/null
+++ b/qai_hub_models/models/googlenet/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.googlenet import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/googlenet/export.py b/qai_hub_models/models/googlenet/export.py
index a369bd62..e611b7d9 100644
--- a/qai_hub_models/models/googlenet/export.py
+++ b/qai_hub_models/models/googlenet/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/googlenet/info.yaml b/qai_hub_models/models/googlenet/info.yaml
index e3143397..c7e0ca3d 100644
--- a/qai_hub_models/models/googlenet/info.yaml
+++ b/qai_hub_models/models/googlenet/info.yaml
@@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml
index 20bc3165..46726a11 100644
--- a/qai_hub_models/models/googlenet/perf.yaml
+++ b/qai_hub_models/models/googlenet/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: GoogLeNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1051.0
-      throughput: 951.4747859181732
+      inference_time: 1052.0
+      throughput: 950.5703422053232
       estimated_peak_memory_range:
-        min: 36864
-        max: 17333992
+        min: 73728
+        max: 1671408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jopryxd0g
+      job_id: j7gjkeq15
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1087.0
-      throughput: 919.9632014719411
+      inference_time: 1088.0
+      throughput: 919.1176470588235
       estimated_peak_memory_range:
-        min: 0
-        max: 4362392
+        min: 16384
+        max: 26332424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: j2p0ro99p
+      job_id: jz5wmq44g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1256.0
-      throughput: 796.1783439490446
+      inference_time: 1306.0
+      throughput: 765.6967840735069
       estimated_peak_memory_range:
-        min: 16384
-        max: 56215984
+        min: 81920
+        max: 33177416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: j1glkw8jp
+      job_id: jz57vx9n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.061712Z'
+    timestamp: '2024-06-08T22:34:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 674.0
-      throughput: 1483.679525222552
+      inference_time: 686.0
+      throughput: 1457.725947521866
       estimated_peak_memory_range:
-        min: 12288
-        max: 46434032
+        min: 16384
+        max: 47804608
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jep2modr5
+      job_id: jlpe4ky85
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 695.0
-      throughput: 1438.8489208633093
+      inference_time: 700.0
+      throughput: 1428.5714285714287
       estimated_peak_memory_range:
-        min: 618496
-        max: 58056704
+        min: 0
+        max: 53870528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: j1p87jrk5
+      job_id: jmg99wdmg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 834.0
-      throughput: 1199.0407673860911
+      inference_time: 828.0
+      throughput: 1207.729468599034
       estimated_peak_memory_range:
-        min: 0
-        max: 29165392
+        min: 618496
+        max: 31247424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: jw561om6p
+      job_id: jqp4jv32p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.061783Z'
+    timestamp: '2024-06-08T22:34:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 1047.0
-      throughput: 955.1098376313277
+      inference_time: 1048.0
+      throughput: 954.1984732824427
       estimated_peak_memory_range:
-        min: 20480
-        max: 2243656
+        min: 40960
+        max: 17749600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jqpyd828p
+      job_id: jygzvrn4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1099.0
-      throughput: 909.9181073703367
+      inference_time: 1098.0
+      throughput: 910.7468123861566
       estimated_peak_memory_range:
-        min: 12288
-        max: 26385480
+        min: 491520
+        max: 26782184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: jn5q241n5
+      job_id: jvgd7o26g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.061828Z'
+    timestamp: '2024-06-08T22:34:46Z'
   - torchscript_onnx_qnn:
-      inference_time: 1276.0
-      throughput: 783.6990595611285
+      inference_time: 1266.0
+      throughput: 789.8894154818325
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: jogky60wp
+      job_id: jnp1qe6ng
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1316.0
-      throughput: 759.8784194528876
+      inference_time: 1388.0
+      throughput: 720.4610951008646
       estimated_peak_memory_range:
-        min: 15437824
-        max: 15437824
+        min: 671744
+        max: 671744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: j1p3mo73g
+      job_id: j0pxeyx85
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.061878Z'
+    timestamp: '2024-06-08T22:34:49Z'
diff --git a/qai_hub_models/models/googlenet_quantized/README.md b/qai_hub_models/models/googlenet_quantized/README.md
index cd504476..91e33b0b 100644
--- a/qai_hub_models/models/googlenet_quantized/README.md
+++ b/qai_hub_models/models/googlenet_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of GoogLeNetQuantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
diff --git a/qai_hub_models/models/googlenet_quantized/evaluate.py b/qai_hub_models/models/googlenet_quantized/evaluate.py
new file mode 100644
index 00000000..0e8be6d5
--- /dev/null
+++ b/qai_hub_models/models/googlenet_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.googlenet_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py
index 616b2243..c9504b86 100644
--- a/qai_hub_models/models/googlenet_quantized/export.py
+++ b/qai_hub_models/models/googlenet_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/googlenet_quantized/info.yaml b/qai_hub_models/models/googlenet_quantized/info.yaml
index 0d5b5538..866df2a0 100644
--- a/qai_hub_models/models/googlenet_quantized/info.yaml
+++ b/qai_hub_models/models/googlenet_quantized/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/googlenet_quantized/model.py b/qai_hub_models/models/googlenet_quantized/model.py
index 8c9b76d6..e41ef8c4 100644
--- a/qai_hub_models/models/googlenet_quantized/model.py
+++ b/qai_hub_models/models/googlenet_quantized/model.py
@@ -22,7 +22,7 @@
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 from qai_hub_models.utils.quantization_aimet import (
     constrain_quantized_inputs_to_image_range,
-    tie_aimet_observer_groups,
+    tie_observers,
 )
 
 MODEL_ID = __name__.split(".")[-2]
@@ -72,7 +72,7 @@ def from_pretrained(
             config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
-        cls._tie_pre_concat_quantizers(sim)
+        tie_observers(sim)
         constrain_quantized_inputs_to_image_range(sim)
 
         if aimet_encodings:
@@ -84,36 +84,3 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
-
-    @classmethod
-    def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel):
-        """
-        This ties together the output quantizers prior to concatenations. This
-        prevents unnecessary re-quantization during the concatenation.
-        """
-        blocks = [
-            sim.model.net.inception3a,
-            sim.model.net.inception3b,
-            sim.model.net.inception4a,
-            sim.model.net.inception4b,
-            sim.model.net.inception4c,
-            sim.model.net.inception4d,
-            sim.model.net.inception4e,
-            sim.model.net.inception5a,
-            sim.model.net.inception5b,
-        ]
-
-        idx = 3
-        groups = []
-        for block in blocks:
-            groups.append(
-                [
-                    getattr(block.branch1, f"module_relu_{idx}"),
-                    getattr(getattr(block.branch2, "1"), f"module_relu_{idx+2}"),
-                    getattr(getattr(block.branch3, "1"), f"module_relu_{idx+4}"),
-                    getattr(getattr(block.branch4, "1"), f"module_relu_{idx+5}"),
-                ]
-            )
-            idx += 6
-
-        tie_aimet_observer_groups(groups)
diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml
index 9d5cd175..d51b481d 100644
--- a/qai_hub_models/models/googlenet_quantized/perf.yaml
+++ b/qai_hub_models/models/googlenet_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: GoogLeNetQuantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 296.0
-      throughput: 3378.3783783783783
+      inference_time: 298.0
+      throughput: 3355.7046979865772
       estimated_peak_memory_range:
-        min: 12288
-        max: 1422272
+        min: 20480
+        max: 1284320
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: j1pvw2nkg
+      job_id: jegnr3kj5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 337.0
-      throughput: 2967.359050445104
+      inference_time: 342.0
+      throughput: 2923.9766081871344
       estimated_peak_memory_range:
-        min: 12288
-        max: 4317312
+        min: 16384
+        max: 10406440
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jmg94oqw5
+      job_id: j1p8wzxqp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 607.0
-      throughput: 1647.4464579901153
+      inference_time: 523.0
+      throughput: 1912.0458891013384
       estimated_peak_memory_range:
         min: 12288
-        max: 21096120
+        max: 12422920
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 91
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jnp18om7g
+        total_layers: 91
+      job_id: jw56qn7ng
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.096189Z'
+    timestamp: '2024-06-08T22:35:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 215.0
-      throughput: 4651.162790697675
+      inference_time: 237.0
+      throughput: 4219.4092827004215
       estimated_peak_memory_range:
         min: 12288
-        max: 33407968
+        max: 34025648
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: j7gjl38vp
+      job_id: jopr1ewkg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 243.0
-      throughput: 4115.22633744856
+      inference_time: 244.0
+      throughput: 4098.360655737705
       estimated_peak_memory_range:
-        min: 159744
-        max: 43857088
+        min: 0
+        max: 42694240
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jnp18om8g
+      job_id: jogkr34v5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 454.0
-      throughput: 2202.643171806167
+      inference_time: 393.0
+      throughput: 2544.529262086514
       estimated_peak_memory_range:
-        min: 581632
-        max: 28507056
+        min: 12288
+        max: 30491248
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 91
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jvgdv6mzg
+        total_layers: 91
+      job_id: j1p3qe9m5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.096253Z'
+    timestamp: '2024-06-08T22:35:34Z'
   - torchscript_onnx_tflite:
       inference_time: 298.0
       throughput: 3355.7046979865772
       estimated_peak_memory_range:
-        min: 12288
-        max: 9463360
+        min: 20480
+        max: 1812976
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jlpev6no5
+      job_id: jep23le6g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 339.0
-      throughput: 2949.8525073746314
+      inference_time: 335.0
+      throughput: 2985.0746268656717
       estimated_peak_memory_range:
-        min: 28672
-        max: 10150424
+        min: 167936
+        max: 10553224
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jz5w9yrmp
+      job_id: j1gle3x2p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.096292Z'
+    timestamp: '2024-06-08T22:35:32Z'
   - torchscript_onnx_tflite:
-      inference_time: 977.0
-      throughput: 1023.5414534288639
+      inference_time: 964.0
+      throughput: 1037.344398340249
       estimated_peak_memory_range:
         min: 12288
-        max: 17947280
+        max: 18322160
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jygz7z0op
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1026.0
-      throughput: 974.6588693957115
-      estimated_peak_memory_range:
-        min: 163840
-        max: 38102800
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 86
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 86
-      job_id: jmg94oq85
+      job_id: jqpyv6m0p
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:41.096330Z'
+    timestamp: '2024-06-08T22:35:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 5627.0
-      throughput: 177.7145903678692
+      inference_time: 5711.0
+      throughput: 175.1006828926633
       estimated_peak_memory_range:
-        min: 12288
-        max: 7365560
+        min: 16384
+        max: 2182760
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jz5w9yr3p
+      job_id: j2p0elj05
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:41.096352Z'
+    timestamp: '2024-06-08T22:35:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 461.0
-      throughput: 2169.1973969631235
+      inference_time: 438.0
+      throughput: 2283.10502283105
       estimated_peak_memory_range:
-        min: 516096
-        max: 516096
+        min: 536576
+        max: 536576
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jvgdv6mrg
+      job_id: jn5q93yep
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 645.0
-      throughput: 1550.3875968992247
+      inference_time: 526.0
+      throughput: 1901.1406844106464
       estimated_peak_memory_range:
-        min: 18911232
-        max: 18911232
+        min: 11812864
+        max: 11812864
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 91
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jz57do895
+        total_layers: 91
+      job_id: jwgoe3r1p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.096393Z'
+    timestamp: '2024-06-08T22:35:35Z'
diff --git a/qai_hub_models/models/hrnet_pose/README.md b/qai_hub_models/models/hrnet_pose/README.md
index 1291e266..d858ca38 100644
--- a/qai_hub_models/models/hrnet_pose/README.md
+++ b/qai_hub_models/models/hrnet_pose/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of HRNetPose can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212)
diff --git a/qai_hub_models/models/hrnet_pose/export.py b/qai_hub_models/models/hrnet_pose/export.py
index c4f04ec8..6853ea8f 100644
--- a/qai_hub_models/models/hrnet_pose/export.py
+++ b/qai_hub_models/models/hrnet_pose/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml
index b68df421..26ccc19f 100644
--- a/qai_hub_models/models/hrnet_pose/perf.yaml
+++ b/qai_hub_models/models/hrnet_pose/perf.yaml
@@ -39,8 +39,8 @@ models:
       inference_time: 2822.0
       throughput: 354.3586109142452
       estimated_peak_memory_range:
-        min: 20480
-        max: 2735056
+        min: 28672
+        max: 2472016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 516
-      job_id: j0px10zlg
+      job_id: j7gjke715
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2884.0
-      throughput: 346.74063800277395
+      inference_time: 2908.0
+      throughput: 343.878954607978
       estimated_peak_memory_range:
         min: 16384
-        max: 21559424
+        max: 21168936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jopryx77g
+      job_id: jz5wmq74g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3038.0
-      throughput: 329.1639236339697
+      inference_time: 3074.0
+      throughput: 325.30904359141186
       estimated_peak_memory_range:
-        min: 81920
-        max: 141558952
+        min: 12288
+        max: 131380776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 749
-      job_id: j1p87jko5
+      job_id: jz5wmq7zg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.139934Z'
+    timestamp: '2024-06-08T22:36:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 2078.0
-      throughput: 481.23195380173246
+      inference_time: 2066.0
+      throughput: 484.027105517909
       estimated_peak_memory_range:
         min: 16384
-        max: 108912400
+        max: 109820208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 516
-      job_id: jo5mz9l9p
+      job_id: jlpe4kz85
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2144.0
-      throughput: 466.4179104477612
+      inference_time: 2134.0
+      throughput: 468.6035613870665
       estimated_peak_memory_range:
         min: 606208
-        max: 191567376
+        max: 190071840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jep2mozq5
+      job_id: jmg99wmmg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2209.0
-      throughput: 452.6935264825713
+      inference_time: 2205.0
+      throughput: 453.51473922902494
       estimated_peak_memory_range:
         min: 12288
-        max: 94173408
+        max: 92302688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 749
-      job_id: jogky6knp
+      job_id: jmg99wmqg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.140178Z'
+    timestamp: '2024-06-08T22:36:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 2876.0
-      throughput: 347.70514603616135
+      inference_time: 2832.0
+      throughput: 353.1073446327684
       estimated_peak_memory_range:
-        min: 24576
-        max: 3247784
+        min: 28672
+        max: 3094624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 516
-      job_id: jegne1wqg
+      job_id: jygzvrm4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2911.0
-      throughput: 343.52456200618343
+      inference_time: 2903.0
+      throughput: 344.47123665173956
       estimated_peak_memory_range:
-        min: 16384
-        max: 20698712
+        min: 12288
+        max: 20792584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: j2p0roxnp
+      job_id: jvgd7o36g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.140315Z'
+    timestamp: '2024-06-08T22:36:13Z'
   - torchscript_onnx_qnn:
-      inference_time: 3152.0
-      throughput: 317.25888324873097
+      inference_time: 3132.0
+      throughput: 319.28480204342276
       estimated_peak_memory_range:
-        min: 589824
-        max: 589824
+        min: 897024
+        max: 897024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jqpyd8ylp
+      job_id: jnp1qejng
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2994.0
-      throughput: 334.001336005344
+      inference_time: 2963.0
+      throughput: 337.4957813027337
       estimated_peak_memory_range:
-        min: 77676544
-        max: 77676544
+        min: 49115136
+        max: 49115136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 749
-      job_id: jn5q24do5
+      job_id: jnp1qejkg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.140494Z'
+    timestamp: '2024-06-08T22:36:16Z'
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md
index fc585abc..570e2312 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of HuggingFace-WavLM-Base-Plus can be found
   [here](https://github.com/microsoft/unilm/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900)
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
index 5a4c60da..ff097a89 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
@@ -171,7 +171,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -197,7 +197,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
index 48153308..cf928d5f 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: HuggingFace-WavLM-Base-Plus
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 950768.0
-      throughput: 1.0517812968042677
+      inference_time: 920916.0
+      throughput: 1.085875367568812
       estimated_peak_memory_range:
-        min: 140214272
-        max: 142718344
+        min: 147881984
+        max: 155477640
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -48,7 +48,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 811
         total_layers: 811
-      job_id: jegne1dqg
+      job_id: jw56qn8vg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +57,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.210607Z'
+    timestamp: '2024-06-08T22:39:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 804134.0
-      throughput: 1.243573832221993
+      inference_time: 819047.0
+      throughput: 1.220931155354943
       estimated_peak_memory_range:
-        min: 149458944
-        max: 186089136
+        min: 148029440
+        max: 185119104
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -71,7 +71,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 811
         total_layers: 811
-      job_id: jopryxm7g
+      job_id: j1p3qezx5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,7 +80,7 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.210708Z'
+    timestamp: '2024-06-08T22:39:47Z'
   - torchscript_onnx_tflite:
       inference_time: 932003.0
       throughput: 1.0729579196633487
@@ -103,12 +103,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.210802Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.210809Z'
+    timestamp: '2024-05-23T16:02:38Z'
diff --git a/qai_hub_models/models/inception_v3/README.md b/qai_hub_models/models/inception_v3/README.md
index 0b085c5e..65bf345a 100644
--- a/qai_hub_models/models/inception_v3/README.md
+++ b/qai_hub_models/models/inception_v3/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Inception-v3 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
diff --git a/qai_hub_models/models/inception_v3/evaluate.py b/qai_hub_models/models/inception_v3/evaluate.py
new file mode 100644
index 00000000..4bf4f8d2
--- /dev/null
+++ b/qai_hub_models/models/inception_v3/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.inception_v3 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/inception_v3/export.py b/qai_hub_models/models/inception_v3/export.py
index 328357a4..868a0239 100644
--- a/qai_hub_models/models/inception_v3/export.py
+++ b/qai_hub_models/models/inception_v3/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/inception_v3/info.yaml b/qai_hub_models/models/inception_v3/info.yaml
index 69856a43..66ecf428 100644
--- a/qai_hub_models/models/inception_v3/info.yaml
+++ b/qai_hub_models/models/inception_v3/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml
index 93777204..bf2b161e 100644
--- a/qai_hub_models/models/inception_v3/perf.yaml
+++ b/qai_hub_models/models/inception_v3/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Inception-v3
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1356.0
-      throughput: 737.4631268436578
+      inference_time: 1355.0
+      throughput: 738.0073800738007
       estimated_peak_memory_range:
         min: 24576
-        max: 2049096
+        max: 2203288
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: j1p3modng
+      job_id: jvgd7odkg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1412.0
-      throughput: 708.2152974504249
+      inference_time: 1424.0
+      throughput: 702.2471910112359
       estimated_peak_memory_range:
-        min: 12288
-        max: 149845872
+        min: 16384
+        max: 150398664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: j7gjl39ep
+      job_id: j0pxeylj5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1729.0
-      throughput: 578.368999421631
+      inference_time: 1714.0
+      throughput: 583.4305717619603
       estimated_peak_memory_range:
-        min: 12288
-        max: 205289944
+        min: 24576
+        max: 216921632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 221
-      job_id: jmg94or85
+      job_id: jep23loxg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.228489Z'
+    timestamp: '2024-06-08T22:40:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 1028.0
-      throughput: 972.7626459143969
+      inference_time: 1026.0
+      throughput: 974.6588693957115
       estimated_peak_memory_range:
         min: 12288
-        max: 52149168
+        max: 54111920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jwgovdxk5
+      job_id: jz57vxeq5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1045.0
-      throughput: 956.9377990430622
+      inference_time: 1055.0
+      throughput: 947.8672985781991
       estimated_peak_memory_range:
-        min: 618496
-        max: 62520720
+        min: 0
+        max: 64200016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: jlpev6qv5
+      job_id: jo5mv30y5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1335.0
-      throughput: 749.0636704119851
+      inference_time: 1328.0
+      throughput: 753.0120481927711
       estimated_peak_memory_range:
-        min: 618496
-        max: 34471808
+        min: 0
+        max: 33764336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 221
-      job_id: jnp18o97g
+      job_id: jqpyv68rp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.228583Z'
+    timestamp: '2024-06-08T22:40:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 1349.0
-      throughput: 741.2898443291327
+      inference_time: 1355.0
+      throughput: 738.0073800738007
       estimated_peak_memory_range:
-        min: 24576
-        max: 2179216
+        min: 16384
+        max: 2130328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: j1pvw28rg
+      job_id: jqp4jvyqp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1404.0
-      throughput: 712.2507122507122
+      inference_time: 1411.0
+      throughput: 708.7172218284904
       estimated_peak_memory_range:
-        min: 69632
-        max: 149743848
+        min: 0
+        max: 150030456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: jz5w9ykmp
+      job_id: jopr1exvg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.228643Z'
+    timestamp: '2024-06-08T22:40:23Z'
   - torchscript_onnx_qnn:
-      inference_time: 1519.0
-      throughput: 658.3278472679394
+      inference_time: 1503.0
+      throughput: 665.335994677312
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 1097728
+        max: 1097728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: jygz7z6xp
+      job_id: jegnr31v5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1677.0
-      throughput: 596.3029218843172
+      inference_time: 1640.0
+      throughput: 609.7560975609756
       estimated_peak_memory_range:
-        min: 47853568
-        max: 47853568
+        min: 39940096
+        max: 39940096
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 221
-      job_id: jvgdv6kzg
+      job_id: j2p0elo25
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.228713Z'
+    timestamp: '2024-06-08T22:40:26Z'
diff --git a/qai_hub_models/models/inception_v3_quantized/README.md b/qai_hub_models/models/inception_v3_quantized/README.md
index c9f4b556..b326f00a 100644
--- a/qai_hub_models/models/inception_v3_quantized/README.md
+++ b/qai_hub_models/models/inception_v3_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Inception-v3-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
diff --git a/qai_hub_models/models/inception_v3_quantized/evaluate.py b/qai_hub_models/models/inception_v3_quantized/evaluate.py
index a4f88114..47341fcd 100644
--- a/qai_hub_models/models/inception_v3_quantized/evaluate.py
+++ b/qai_hub_models/models/inception_v3_quantized/evaluate.py
@@ -2,17 +2,21 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
 from __future__ import annotations
 
 import warnings
 
 import qai_hub as hub
 
-from qai_hub_models.models.inception_v3 import Model as FP16Model
 from qai_hub_models.models.inception_v3_quantized import MODEL_ID, Model
 from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.evaluate import evaluate_on_dataset
 from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
 
 SUPPORTED_DATASETS = ["imagenette", "imagenet"]
 
@@ -20,9 +24,12 @@
 def main():
     warnings.filterwarnings("ignore")
     parser = evaluate_parser(
-        model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
     )
     args = parser.parse_args()
+    args.device = None
 
     if args.hub_model_id is not None:
         hub_model = hub.get_model(args.hub_model_id)
@@ -30,8 +37,14 @@ def main():
         hub_model = compile_model_from_args(
             MODEL_ID, args, get_model_kwargs(Model, vars(args))
         )
-    hub_device = get_hub_device(args.device, args.chipset)
-    torch_model = FP16Model.from_pretrained(**get_model_kwargs(FP16Model, vars(args)))
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
     evaluate_on_dataset(
         hub_model,
         torch_model,
diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py
index cfd584a1..e6cd4f44 100644
--- a/qai_hub_models/models/inception_v3_quantized/export.py
+++ b/qai_hub_models/models/inception_v3_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/inception_v3_quantized/info.yaml b/qai_hub_models/models/inception_v3_quantized/info.yaml
index c3d40275..4f3446a8 100644
--- a/qai_hub_models/models/inception_v3_quantized/info.yaml
+++ b/qai_hub_models/models/inception_v3_quantized/info.yaml
@@ -43,3 +43,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/inception_v3_quantized/model.py b/qai_hub_models/models/inception_v3_quantized/model.py
index a53c2f4f..2a74e221 100644
--- a/qai_hub_models/models/inception_v3_quantized/model.py
+++ b/qai_hub_models/models/inception_v3_quantized/model.py
@@ -22,7 +22,7 @@
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 from qai_hub_models.utils.quantization_aimet import (
     constrain_quantized_inputs_to_image_range,
-    tie_aimet_observer_groups,
+    tie_observers,
 )
 
 MODEL_ID = __name__.split(".")[-2]
@@ -75,7 +75,7 @@ def from_pretrained(
             config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
-        cls._tie_pre_concat_quantizers(sim)
+        tie_observers(sim)
         constrain_quantized_inputs_to_image_range(sim)
 
         if aimet_encodings:
@@ -87,111 +87,3 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
-
-    @classmethod
-    def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel):
-        """
-        This ties together the output quantizers prior to concatenations. This
-        prevents unnecessary re-quantization during the concatenation, and even
-        avoids fatal TFLite converter errors.
-        """
-
-        n = sim.model.net
-        groups = [
-            [
-                n.maxpool2,
-                n.Mixed_5b.module_avg_pool2d,
-            ],
-            [
-                n.Mixed_5b.branch1x1.module_relu_5,
-                n.Mixed_5b.branch5x5_2.module_relu_7,
-                n.Mixed_5b.branch3x3dbl_3.module_relu_10,
-                n.Mixed_5b.branch_pool.module_relu_11,
-                n.Mixed_5b.module_cat,
-                n.Mixed_5c.module_avg_pool2d_1,
-            ],
-            [
-                n.Mixed_5c.branch1x1.module_relu_12,
-                n.Mixed_5c.branch5x5_2.module_relu_14,
-                n.Mixed_5c.branch3x3dbl_3.module_relu_17,
-                n.Mixed_5c.branch_pool.module_relu_18,
-                n.Mixed_5c.module_cat_1,
-                n.Mixed_5d.module_avg_pool2d_2,
-            ],
-            [
-                n.Mixed_5d.branch1x1.module_relu_19,
-                n.Mixed_5d.branch5x5_2.module_relu_21,
-                n.Mixed_5d.branch3x3dbl_3.module_relu_24,
-                n.Mixed_5d.branch_pool.module_relu_25,
-                n.Mixed_5d.module_cat_2,
-                # This group has a branch with only a max pool,
-                # this requires the two concat groups to merge
-                n.Mixed_6a.branch3x3.module_relu_26,
-                n.Mixed_6a.branch3x3dbl_3.module_relu_29,
-                n.Mixed_6a.module_max_pool2d,
-                n.Mixed_6a.module_cat_3,
-                n.Mixed_6b.module_avg_pool2d_3,
-            ],
-            [
-                n.Mixed_6b.branch1x1.module_relu_30,
-                n.Mixed_6b.branch7x7_3.module_relu_33,
-                n.Mixed_6b.branch7x7dbl_5.module_relu_38,
-                n.Mixed_6b.branch_pool.module_relu_39,
-                n.Mixed_6b.module_cat_4,
-                n.Mixed_6c.module_avg_pool2d_4,
-            ],
-            [
-                n.Mixed_6c.branch1x1.module_relu_40,
-                n.Mixed_6c.branch7x7_3.module_relu_43,
-                n.Mixed_6c.branch7x7dbl_5.module_relu_48,
-                n.Mixed_6c.branch_pool.module_relu_49,
-                n.Mixed_6c.module_cat_5,
-                n.Mixed_6d.module_avg_pool2d_5,
-            ],
-            [
-                n.Mixed_6d.branch1x1.module_relu_50,
-                n.Mixed_6d.branch7x7_3.module_relu_53,
-                n.Mixed_6d.branch7x7dbl_5.module_relu_58,
-                n.Mixed_6d.branch_pool.module_relu_59,
-                n.Mixed_6d.module_cat_6,
-                n.Mixed_6e.module_avg_pool2d_6,
-            ],
-            [
-                n.Mixed_6e.branch1x1.module_relu_60,
-                n.Mixed_6e.branch7x7_3.module_relu_63,
-                n.Mixed_6e.branch7x7dbl_5.module_relu_68,
-                n.Mixed_6e.branch_pool.module_relu_69,
-                n.Mixed_6e.module_cat_7,
-                # This group has a branch with only a max pool,
-                # this requires the two concat groups to merge
-                n.Mixed_7a.branch3x3_2.module_relu_71,
-                n.Mixed_7a.branch7x7x3_4.module_relu_75,
-                n.Mixed_7a.module_max_pool2d_1,
-                n.Mixed_7a.module_cat_8,
-                n.Mixed_7b.module_avg_pool2d_7,
-            ],
-            [
-                n.Mixed_7b.branch1x1.module_relu_76,
-                n.Mixed_7b.branch3x3_2a.module_relu_78,
-                n.Mixed_7b.branch3x3_2b.module_relu_79,
-                n.Mixed_7b.branch3x3dbl_3a.module_relu_82,
-                n.Mixed_7b.branch3x3dbl_3b.module_relu_83,
-                n.Mixed_7b.branch_pool.module_relu_84,
-                n.Mixed_7b.module_cat_9,
-                n.Mixed_7b.module_cat_10,
-                n.Mixed_7b.module_cat_11,
-                n.Mixed_7c.module_avg_pool2d_8,
-            ],
-            [
-                n.Mixed_7c.branch1x1.module_relu_85,
-                n.Mixed_7c.branch3x3_2a.module_relu_87,
-                n.Mixed_7c.branch3x3_2b.module_relu_88,
-                n.Mixed_7c.branch3x3dbl_3a.module_relu_91,
-                n.Mixed_7c.branch3x3dbl_3b.module_relu_92,
-                n.Mixed_7c.branch_pool.module_relu_93,
-                n.Mixed_7c.module_cat_12,
-                n.Mixed_7c.module_cat_13,
-                n.Mixed_7c.module_cat_14,
-            ],
-        ]
-        tie_aimet_observer_groups(groups)
diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml
index 07c91205..a770e878 100644
--- a/qai_hub_models/models/inception_v3_quantized/perf.yaml
+++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: Inception-v3-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 617.0
-      throughput: 1620.7455429497568
+      inference_time: 615.0
+      throughput: 1626.0162601626016
       estimated_peak_memory_range:
         min: 12288
-        max: 1887024
+        max: 1478976
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jo5mz979p
+      job_id: jogkr36y5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 645.0
-      throughput: 1550.3875968992247
+      inference_time: 646.0
+      throughput: 1547.9876160990711
       estimated_peak_memory_range:
-        min: 221184
-        max: 60468504
+        min: 12288
+        max: 165286688
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: j2p0rownp
+      job_id: jwgoe3d4p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 932.0
-      throughput: 1072.961373390558
+      inference_time: 844.0
+      throughput: 1184.8341232227488
       estimated_peak_memory_range:
-        min: 65536
-        max: 32199936
+        min: 12288
+        max: 65222768
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 137
+        layers_on_npu: 134
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 137
-      job_id: jw561okyp
+        total_layers: 134
+      job_id: jygzvrzzp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.263421Z'
+    timestamp: '2024-06-08T22:41:32Z'
   - torchscript_onnx_tflite:
-      inference_time: 494.0
-      throughput: 2024.2914979757086
+      inference_time: 486.0
+      throughput: 2057.61316872428
       estimated_peak_memory_range:
         min: 12288
-        max: 64806240
+        max: 67571472
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jegne14qg
+      job_id: jn5q9347p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 502.0
-      throughput: 1992.03187250996
+      inference_time: 496.0
+      throughput: 2016.1290322580646
       estimated_peak_memory_range:
         min: 167936
-        max: 54785280
+        max: 54564464
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: j1p87jno5
+      job_id: j1pvzv27g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 728.0
-      throughput: 1373.6263736263736
+      inference_time: 659.0
+      throughput: 1517.4506828528072
       estimated_peak_memory_range:
-        min: 618496
-        max: 41725008
+        min: 12288
+        max: 43078608
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 137
+        layers_on_npu: 134
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 137
-      job_id: j1p3moyng
+        total_layers: 134
+      job_id: jz5wmqyzg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.263500Z'
+    timestamp: '2024-06-08T22:41:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 613.0
-      throughput: 1631.3213703099511
+      inference_time: 625.0
+      throughput: 1600.0
       estimated_peak_memory_range:
-        min: 16384
-        max: 1745384
+        min: 24576
+        max: 1548872
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jopryxr7g
+      job_id: j1gle3wep
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 648.0
       throughput: 1543.20987654321
       estimated_peak_memory_range:
-        min: 0
-        max: 6304608
+        min: 36864
+        max: 39620504
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: jn5q24no5
+      job_id: jlpe4k675
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.263550Z'
+    timestamp: '2024-06-08T22:41:31Z'
   - torchscript_onnx_tflite:
-      inference_time: 2343.0
-      throughput: 426.8032437046522
+      inference_time: 2357.0
+      throughput: 424.26813746287655
       estimated_peak_memory_range:
-        min: 16384
-        max: 21927008
+        min: 12288
+        max: 22249744
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jep2mo1q5
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 2406.0
-      throughput: 415.6275976724855
-      estimated_peak_memory_range:
-        min: 12288
-        max: 51696096
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 134
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 134
-      job_id: j1glkwjmp
+      job_id: jw56qnovg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:41.263601Z'
+    timestamp: '2024-06-08T22:41:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 7547.0
-      throughput: 132.50298131707964
+      inference_time: 7805.0
+      throughput: 128.12299807815504
       estimated_peak_memory_range:
-        min: 167936
-        max: 7482520
+        min: 16384
+        max: 2215816
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jqpyd8llp
+      job_id: j1p3qeox5
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:41.263629Z'
+    timestamp: '2024-06-08T22:41:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 710.0
-      throughput: 1408.4507042253522
+      inference_time: 706.0
+      throughput: 1416.4305949008499
       estimated_peak_memory_range:
-        min: 446464
-        max: 446464
+        min: 450560
+        max: 450560
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: jogky61np
+      job_id: j7gjke375
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 898.0
-      throughput: 1113.5857461024498
+      inference_time: 782.0
+      throughput: 1278.772378516624
       estimated_peak_memory_range:
-        min: 41455616
-        max: 41455616
+        min: 12218368
+        max: 12218368
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 137
+        layers_on_npu: 134
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 137
-      job_id: jwgovdjk5
+        total_layers: 134
+      job_id: jmg99woqg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.263679Z'
+    timestamp: '2024-06-08T22:41:34Z'
diff --git a/qai_hub_models/models/lama_dilated/README.md b/qai_hub_models/models/lama_dilated/README.md
index 511bdc4a..a418710e 100644
--- a/qai_hub_models/models/lama_dilated/README.md
+++ b/qai_hub_models/models/lama_dilated/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of LaMa-Dilated can be found
   [here](https://github.com/advimman/lama/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Resolution-robust Large Mask Inpainting with Fourier Convolutions](https://arxiv.org/abs/2109.07161)
diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py
index 87478f48..b4b49854 100644
--- a/qai_hub_models/models/lama_dilated/export.py
+++ b/qai_hub_models/models/lama_dilated/export.py
@@ -190,7 +190,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml
index d59888e4..87e48fb1 100644
--- a/qai_hub_models/models/lama_dilated/perf.yaml
+++ b/qai_hub_models/models/lama_dilated/perf.yaml
@@ -36,26 +36,26 @@ models:
 - name: LaMa-Dilated
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 87216.0
-      throughput: 11.465786094294625
+      inference_time: 86343.0
+      throughput: 11.581714788691613
       estimated_peak_memory_range:
-        min: 2220032
-        max: 138408472
+        min: 3289088
+        max: 139370192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 347
+        layers_on_npu: 344
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 347
-      job_id: jygz7z1xp
+        total_layers: 344
+      job_id: jvgd7o6kg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 81054.0
-      throughput: 12.33745404298369
+      inference_time: 81307.0
+      throughput: 12.299064041226462
       estimated_peak_memory_range:
-        min: 3207168
-        max: 44584112
+        min: 3371008
+        max: 42726616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: jvgdv6jzg
+      job_id: j0pxey0j5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +72,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.307510Z'
+    timestamp: '2024-06-08T22:42:12Z'
   - torchscript_onnx_tflite:
-      inference_time: 59838.0
-      throughput: 16.7117884956048
+      inference_time: 59391.0
+      throughput: 16.837567981680728
       estimated_peak_memory_range:
-        min: 2932736
-        max: 243876640
+        min: 53248
+        max: 241657616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 347
+        layers_on_npu: 344
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 347
-      job_id: jmg94o685
+        total_layers: 344
+      job_id: jz57vxoq5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 56713.0
-      throughput: 17.632641546030012
+      inference_time: 57168.0
+      throughput: 17.492303386509935
       estimated_peak_memory_range:
-        min: 1187840
-        max: 164342080
+        min: 2736128
+        max: 165991776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: jz57doq95
+      job_id: jo5mv39y5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,28 +110,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.307597Z'
+    timestamp: '2024-06-08T22:42:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 87348.0
-      throughput: 11.448459037413564
+      inference_time: 85709.0
+      throughput: 11.667386155479589
       estimated_peak_memory_range:
-        min: 3272704
-        max: 139025312
+        min: 3477504
+        max: 138753616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 347
+        layers_on_npu: 344
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 347
-      job_id: jnp18or7g
+        total_layers: 344
+      job_id: jqp4jveqp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 81632.0
-      throughput: 12.250098000784007
+      inference_time: 81015.0
+      throughput: 12.343393198790347
       estimated_peak_memory_range:
-        min: 3194880
-        max: 39290392
+        min: 3174400
+        max: 43648896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -139,7 +139,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: j0px10wlg
+      job_id: jopr1edvg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +148,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.307681Z'
+    timestamp: '2024-06-08T22:42:16Z'
   - torchscript_onnx_qnn:
-      inference_time: 92179.0
-      throughput: 10.848457891710693
+      inference_time: 91919.0
+      throughput: 10.879143593816295
       estimated_peak_memory_range:
         min: 4202496
         max: 4202496
@@ -162,7 +162,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: jqp4wez1g
+      job_id: jegnr3qv5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -171,4 +171,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.307729Z'
+    timestamp: '2024-06-08T22:42:14Z'
diff --git a/qai_hub_models/models/litehrnet/README.md b/qai_hub_models/models/litehrnet/README.md
index 13fa47e0..d44fd6a6 100644
--- a/qai_hub_models/models/litehrnet/README.md
+++ b/qai_hub_models/models/litehrnet/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of LiteHRNet can be found
   [here](https://github.com/HRNet/Lite-HRNet/blob/hrnet/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Lite-HRNet: A Lightweight High-Resolution Network](https://arxiv.org/abs/2104.06403)
diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py
index 81e07f3a..9dba5402 100644
--- a/qai_hub_models/models/litehrnet/export.py
+++ b/qai_hub_models/models/litehrnet/export.py
@@ -172,7 +172,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -201,7 +201,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml
index 87dcc012..64bcf8e7 100644
--- a/qai_hub_models/models/litehrnet/perf.yaml
+++ b/qai_hub_models/models/litehrnet/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: LiteHRNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11098.0
-      throughput: 90.10632546404757
+      inference_time: 11261.0
+      throughput: 88.80206020779683
       estimated_peak_memory_range:
-        min: 6336512
-        max: 9866728
+        min: 6529024
+        max: 13390128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,7 +48,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: jqpyd89lp
+      job_id: jogkr30y5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +57,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.334245Z'
+    timestamp: '2024-06-08T22:42:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 7918.0
-      throughput: 126.2945188178833
+      inference_time: 7629.0
+      throughput: 131.07877834578582
       estimated_peak_memory_range:
-        min: 16384
-        max: 74961808
+        min: 6545408
+        max: 86932832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -71,7 +71,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: j1p87jlo5
+      job_id: jn5q9317p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +80,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.334395Z'
+    timestamp: '2024-06-08T22:42:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 11183.0
-      throughput: 89.42144326209424
+      inference_time: 11181.0
+      throughput: 89.43743851176103
       estimated_peak_memory_range:
-        min: 6533120
-        max: 9097208
+        min: 6561792
+        max: 18010528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -94,7 +94,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: jogky6jnp
+      job_id: j1gle38ep
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,12 +103,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.334538Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.334546Z'
+    timestamp: '2024-06-08T22:42:56Z'
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
index 97d5d37b..54e3e3f8 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
@@ -70,7 +70,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Llama-v2-7B-Chat can be found
   [here](https://github.com/facebookresearch/llama/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/facebookresearch/llama/blob/main/LICENSE)
 
 ## References
 * [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py
index 72ea13aa..5dc3fda2 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py
@@ -256,7 +256,7 @@ def _get_past_key_names(start: int = 0, end: int = 8, suffix=""):
 
 def _get_output_names_from_split(split_part: int = 1):
     layer_start, layer_end = _get_hidden_layer_range_from_split(split_part=split_part)
-    output_list = [f"layers_{layer_end-1}_add_out_0"]
+    output_list = [f"layers_{layer_end - 1}_add_out_0"]
     output_list += _get_past_key_names(layer_start, layer_end, suffix="_out")
     return output_list
 
@@ -414,7 +414,7 @@ def _get_llama_model_with_split(
         os.path.join(
             AIMET_ENCODINGS_PREFIX,
             model_encoding_tag,
-            f"llama_{model_encoding_tag}_sha_{split_part-1}.encodings",
+            f"llama_{model_encoding_tag}_sha_{split_part - 1}.encodings",
         )
     )
     aimet_encodings = str(
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml
index 19c64c6c..b8c5ad10 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml
@@ -24,6 +24,29 @@ models:
       primary_compute_unit: NPU
       job_id: "null"
       job_status: Passed
+  - reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-05-23T00:34:02.549319Z'
+    torchscript_onnx_qnn:
+      inference_time: 118139
+      throughput: 8.46
+      estimated_peak_memory_range:
+        min: 68124672
+        max: 68124672
+      layer_info:
+        layers_on_npu: 34842
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 34842
+      precision: uint16
+      primary_compute_unit: NPU
+      job_id: "null"
+      job_status: Passed
 - name: Llama-PromptProcessor-Quantized
   performance_metrics:
   - reference_device_info:
@@ -49,15 +72,40 @@ models:
       primary_compute_unit: NPU
       job_id: "null"
       job_status: Passed
+  - reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-05-23T00:34:02.549319Z'
+    torchscript_onnx_qnn:
+      inference_time: 2302575
+      throughput: 445.21
+      estimated_peak_memory_range:
+        min: 10788864
+        max: 10788864
+      layer_info:
+        layers_on_npu: 31766
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 31766
+      precision: uint16
+      primary_compute_unit: NPU
+      job_id: "null"
+      job_status: Passed
 aggregated:
   supported_devices:
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S24
+  - Snapdragon X Elite CRD
   supported_oses:
   - Android
   supported_chipsets:
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
+  - Snapdragon® X Elite
   performance_metrics:
   - reference_device_info:
       name: Samsung Galaxy S23 Ultra
diff --git a/qai_hub_models/models/mediapipe_face/README.md b/qai_hub_models/models/mediapipe_face/README.md
index 1a280d17..a565c33d 100644
--- a/qai_hub_models/models/mediapipe_face/README.md
+++ b/qai_hub_models/models/mediapipe_face/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Face-Detection can be found
   [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs](https://arxiv.org/abs/1907.05047)
diff --git a/qai_hub_models/models/mediapipe_face/export.py b/qai_hub_models/models/mediapipe_face/export.py
index 67ca40df..adb92a20 100644
--- a/qai_hub_models/models/mediapipe_face/export.py
+++ b/qai_hub_models/models/mediapipe_face/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml
index bf8dcabd..ed6dfc34 100644
--- a/qai_hub_models/models/mediapipe_face/perf.yaml
+++ b/qai_hub_models/models/mediapipe_face/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MediaPipeFaceDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 779.0
-      throughput: 1283.6970474967907
+      inference_time: 781.0
+      throughput: 1280.4097311139565
       estimated_peak_memory_range:
-        min: 12288
-        max: 3969936
+        min: 90112
+        max: 2155184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: j1pvw23rg
+      job_id: j7gjkey75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 839.0
-      throughput: 1191.8951132300358
+      inference_time: 835.0
+      throughput: 1197.6047904191616
       estimated_peak_memory_range:
-        min: 20480
-        max: 5908152
+        min: 16384
+        max: 101864120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jnp18o07g
+      job_id: jvgd7o4kg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1003.0
-      throughput: 997.0089730807578
+      inference_time: 986.0
+      throughput: 1014.1987829614604
       estimated_peak_memory_range:
-        min: 2125824
-        max: 11100896
+        min: 552960
+        max: 8114576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: j0px10v8g
+      job_id: jqpyv6yrp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.352382Z'
+    timestamp: '2024-06-08T22:45:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 575.0
-      throughput: 1739.1304347826087
+      inference_time: 543.0
+      throughput: 1841.6206261510129
       estimated_peak_memory_range:
-        min: 12288
-        max: 29783776
+        min: 16384
+        max: 31618960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: jlpev69v5
+      job_id: jygzvryzp
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 593.0
       throughput: 1686.3406408094436
       estimated_peak_memory_range:
-        min: 12288
-        max: 48781760
+        min: 802816
+        max: 49388544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jz5w9yo4p
+      job_id: jqp4jv4qp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 734.0
-      throughput: 1362.3978201634877
+      inference_time: 706.0
+      throughput: 1416.4305949008499
       estimated_peak_memory_range:
-        min: 12288
-        max: 23671680
+        min: 548864
+        max: 22898592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jegne12jg
+      job_id: j1p8wzkzp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.352462Z'
+    timestamp: '2024-06-08T22:45:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 775.0
-      throughput: 1290.3225806451612
+      inference_time: 779.0
+      throughput: 1283.6970474967907
       estimated_peak_memory_range:
-        min: 32768
-        max: 1633176
+        min: 12288
+        max: 1532120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: jz5w9yomp
+      job_id: jmg99w2qg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 842.0
-      throughput: 1187.648456057007
+      inference_time: 835.0
+      throughput: 1197.6047904191616
       estimated_peak_memory_range:
-        min: 819200
-        max: 7053256
+        min: 806912
+        max: 77885504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jz57dozn5
+      job_id: jopr1e7vg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.352513Z'
+    timestamp: '2024-06-08T22:45:21Z'
   - torchscript_onnx_qnn:
-      inference_time: 931.0
-      throughput: 1074.1138560687432
+      inference_time: 928.0
+      throughput: 1077.5862068965516
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jnp18o0ng
+      job_id: jo5mv3ky5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 999.0
-      throughput: 1001.001001001001
+      inference_time: 997.0
+      throughput: 1003.0090270812437
       estimated_peak_memory_range:
-        min: 3235840
-        max: 3235840
+        min: 5971968
+        max: 5971968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jep2mo865
+      job_id: jn5q93d7p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +216,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.352570Z'
+    timestamp: '2024-06-08T22:45:27Z'
 - name: MediaPipeFaceLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 320.0
-      throughput: 3125.0
+      inference_time: 318.0
+      throughput: 3144.6540880503144
       estimated_peak_memory_range:
-        min: 20480
-        max: 1923360
+        min: 12288
+        max: 2130328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 101
-      job_id: j7gjl3xep
+      job_id: jlpe4kx75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 384.0
-      throughput: 2604.1666666666665
+      inference_time: 391.0
+      throughput: 2557.544757033248
       estimated_peak_memory_range:
-        min: 462848
-        max: 3777488
+        min: 131072
+        max: 98992544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +247,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jvgdv6wzg
+      job_id: jz57vxnq5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 491.0
-      throughput: 2036.6598778004072
+      inference_time: 532.0
+      throughput: 1879.6992481203008
       estimated_peak_memory_range:
-        min: 16384
-        max: 40070192
+        min: 12288
+        max: 84060104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +262,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: jo5mz9r7p
+      job_id: j2p0elx25
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +271,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.352636Z'
+    timestamp: '2024-06-08T22:45:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 247.0
-      throughput: 4048.582995951417
+      inference_time: 224.0
+      throughput: 4464.285714285715
       estimated_peak_memory_range:
-        min: 12288
-        max: 26073488
+        min: 16384
+        max: 27155600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +285,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 101
-      job_id: jygz7zexp
+      job_id: jz5wmqzzg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 281.0
-      throughput: 3558.7188612099644
+      inference_time: 283.0
+      throughput: 3533.5689045936397
       estimated_peak_memory_range:
-        min: 12288
-        max: 37232720
+        min: 458752
+        max: 40876896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +300,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jmg94ovm5
+      job_id: j0pxeyrj5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 353.0
-      throughput: 2832.8611898016998
+      inference_time: 375.0
+      throughput: 2666.6666666666665
       estimated_peak_memory_range:
         min: 12288
-        max: 22272016
+        max: 19616240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +315,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: jopryxkkg
+      job_id: jogkr3ky5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +324,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.352701Z'
+    timestamp: '2024-06-08T22:45:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 311.0
-      throughput: 3215.434083601286
+      inference_time: 309.0
+      throughput: 3236.2459546925566
       estimated_peak_memory_range:
-        min: 24576
-        max: 4438632
+        min: 12288
+        max: 1641680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +338,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 101
-      job_id: jmg94ov85
+      job_id: jnp1qe1kg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 386.0
-      throughput: 2590.6735751295337
+      inference_time: 395.0
+      throughput: 2531.6455696202534
       estimated_peak_memory_range:
-        min: 458752
-        max: 3741368
+        min: 290816
+        max: 8822944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +353,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jqp4weq2g
+      job_id: jep23lzxg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,10 +362,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.352746Z'
+    timestamp: '2024-06-08T22:45:22Z'
   - torchscript_onnx_qnn:
-      inference_time: 520.0
-      throughput: 1923.076923076923
+      inference_time: 497.0
+      throughput: 2012.0724346076458
       estimated_peak_memory_range:
         min: 442368
         max: 442368
@@ -376,14 +376,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: jvgdv6w6g
+      job_id: jegnr3wv5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 526.0
-      throughput: 1901.1406844106464
+      inference_time: 521.0
+      throughput: 1919.3857965451057
       estimated_peak_memory_range:
-        min: 4198400
-        max: 4198400
+        min: 5312512
+        max: 5312512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +391,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: jqpyd8e0p
+      job_id: j1gle3qep
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +400,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.352791Z'
+    timestamp: '2024-06-08T22:45:27Z'
diff --git a/qai_hub_models/models/mediapipe_hand/README.md b/qai_hub_models/models/mediapipe_hand/README.md
index 8e327a5a..0e49e035 100644
--- a/qai_hub_models/models/mediapipe_hand/README.md
+++ b/qai_hub_models/models/mediapipe_hand/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Hand-Detection can be found
   [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [MediaPipe Hands: On-device Real-time Hand Tracking](https://arxiv.org/abs/2006.10214)
diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py
index df8302cd..0100ddad 100644
--- a/qai_hub_models/models/mediapipe_hand/export.py
+++ b/qai_hub_models/models/mediapipe_hand/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml
index 53e99545..67190c80 100644
--- a/qai_hub_models/models/mediapipe_hand/perf.yaml
+++ b/qai_hub_models/models/mediapipe_hand/perf.yaml
@@ -36,26 +36,26 @@ models:
 - name: MediaPipeHandDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 955.0
-      throughput: 1047.1204188481674
+      inference_time: 2260.0
+      throughput: 442.4778761061947
       estimated_peak_memory_range:
         min: 12288
-        max: 2084224
+        max: 11649504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 152
+        layers_on_npu: 148
         layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 152
-      job_id: jn5q24ke5
+        layers_on_cpu: 2
+        total_layers: 150
+      job_id: jwgoe394p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1012.0
-      throughput: 988.1422924901186
+      inference_time: 1017.0
+      throughput: 983.284169124877
       estimated_peak_memory_range:
-        min: 16384
-        max: 10188288
+        min: 20480
+        max: 21650176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 197
-      job_id: jlpev6185
+      job_id: jmg99wqqg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1202.0
-      throughput: 831.9467554076539
+      inference_time: 1164.0
+      throughput: 859.106529209622
       estimated_peak_memory_range:
-        min: 40960
-        max: 16016072
+        min: 12288
+        max: 18412096
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jegne1yjg
+      job_id: jqp4jv6lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,28 +87,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.415401Z'
+    timestamp: '2024-06-08T22:46:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 703.0
-      throughput: 1422.475106685633
+      inference_time: 1902.0
+      throughput: 525.7623554153523
       estimated_peak_memory_range:
         min: 12288
-        max: 54231920
+        max: 50595712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 152
+        layers_on_npu: 148
         layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 152
-      job_id: jw561ojnp
+        layers_on_cpu: 2
+        total_layers: 150
+      job_id: j7gjke875
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 724.0
-      throughput: 1381.2154696132598
+      inference_time: 722.0
+      throughput: 1385.0415512465374
       estimated_peak_memory_range:
         min: 802816
-        max: 60616784
+        max: 60773680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 197
-      job_id: jz5w9yv4p
+      job_id: jvgd7omkg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 846.0
-      throughput: 1182.033096926714
+      inference_time: 837.0
+      throughput: 1194.7431302270013
       estimated_peak_memory_range:
-        min: 425984
-        max: 36661744
+        min: 323584
+        max: 36752192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jqpyd8w0p
+      job_id: jo5mv31q5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,28 +140,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.415495Z'
+    timestamp: '2024-06-08T22:46:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 954.0
-      throughput: 1048.2180293501049
+      inference_time: 2331.0
+      throughput: 429.000429000429
       estimated_peak_memory_range:
-        min: 16384
-        max: 4494792
+        min: 36864
+        max: 2444200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 152
+        layers_on_npu: 148
         layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 152
-      job_id: jwgovd015
+        layers_on_cpu: 2
+        total_layers: 150
+      job_id: jygzvr0zp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1013.0
-      throughput: 987.1668311944719
+      inference_time: 1015.0
+      throughput: 985.2216748768473
       estimated_peak_memory_range:
-        min: 12288
-        max: 11646912
+        min: 806912
+        max: 10668872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 197
-      job_id: j0px10j8g
+      job_id: jvgd7omlg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.415555Z'
+    timestamp: '2024-06-08T22:46:01Z'
   - torchscript_onnx_qnn:
-      inference_time: 1058.0
-      throughput: 945.179584120983
+      inference_time: 1036.0
+      throughput: 965.2509652509652
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jvgdv696g
+      job_id: jmg99wqvg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1250.0
-      throughput: 800.0
+      inference_time: 1182.0
+      throughput: 846.0236886632825
       estimated_peak_memory_range:
-        min: 905216
-        max: 905216
+        min: 704512
+        max: 704512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: j1p87jvq5
+      job_id: jopr1emeg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,7 +216,7 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.415621Z'
+    timestamp: '2024-06-08T22:46:06Z'
 - name: MediaPipeHandLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
@@ -224,7 +224,7 @@ models:
       throughput: 829.8755186721992
       estimated_peak_memory_range:
         min: 12288
-        max: 1536064
+        max: 2551752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 159
-      job_id: j1glkwz2p
+      job_id: j1pvzvn7g
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 1299.0
       throughput: 769.8229407236336
       estimated_peak_memory_range:
-        min: 806912
-        max: 9217032
+        min: 802816
+        max: 8940712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +247,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 210
-      job_id: jygz7z94p
+      job_id: jnp1qemkg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1591.0
-      throughput: 628.5355122564425
+      inference_time: 1521.0
+      throughput: 657.4621959237344
       estimated_peak_memory_range:
-        min: 28672
-        max: 19720496
+        min: 12288
+        max: 143178688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +262,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: jopryxqkg
+      job_id: j0pxey895
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +271,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.415714Z'
+    timestamp: '2024-06-08T22:46:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 896.0
-      throughput: 1116.0714285714287
+      inference_time: 903.0
+      throughput: 1107.4197120708748
       estimated_peak_memory_range:
         min: 12288
-        max: 57199744
+        max: 59093296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +285,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 159
-      job_id: j1p3mo3mg
+      job_id: jlpe4kn75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 966.0
-      throughput: 1035.1966873706003
+      inference_time: 962.0
+      throughput: 1039.5010395010395
       estimated_peak_memory_range:
         min: 802816
-        max: 65744144
+        max: 66542112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +300,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 210
-      job_id: jnp18olng
+      job_id: jz5wmqrjg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1136.0
-      throughput: 880.2816901408451
+      inference_time: 1121.0
+      throughput: 892.0606601248885
       estimated_peak_memory_range:
-        min: 724992
-        max: 31554048
+        min: 802816
+        max: 30698880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +315,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: j2p0ro70p
+      job_id: jegnr3dm5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +324,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.415805Z'
+    timestamp: '2024-06-08T22:46:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 1200.0
-      throughput: 833.3333333333334
+      inference_time: 1196.0
+      throughput: 836.1204013377926
       estimated_peak_memory_range:
         min: 28672
-        max: 1701744
+        max: 1643304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +338,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 159
-      job_id: j1pvw2ozg
+      job_id: jz5wmqrzg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1302.0
-      throughput: 768.0491551459294
+      inference_time: 1318.0
+      throughput: 758.7253414264036
       estimated_peak_memory_range:
-        min: 819200
-        max: 42331408
+        min: 294912
+        max: 52198264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +353,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 210
-      job_id: jo5mz927p
+      job_id: jz57vx1r5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,13 +362,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.415867Z'
+    timestamp: '2024-06-08T22:46:01Z'
   - torchscript_onnx_qnn:
-      inference_time: 1468.0
-      throughput: 681.1989100817439
+      inference_time: 1513.0
+      throughput: 660.9385327164574
       estimated_peak_memory_range:
-        min: 786432
-        max: 786432
+        min: 1150976
+        max: 1150976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -376,14 +376,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: jz57down5
+      job_id: jnp1qemlg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1556.0
-      throughput: 642.6735218508998
+      inference_time: 1551.0
+      throughput: 644.7453255963894
       estimated_peak_memory_range:
-        min: 18542592
-        max: 18542592
+        min: 20062208
+        max: 20062208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +391,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: jn5q24oe5
+      job_id: jep23lqmg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +400,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.415965Z'
+    timestamp: '2024-06-08T22:46:06Z'
diff --git a/qai_hub_models/models/mediapipe_pose/README.md b/qai_hub_models/models/mediapipe_pose/README.md
index 97b007ee..a63082b8 100644
--- a/qai_hub_models/models/mediapipe_pose/README.md
+++ b/qai_hub_models/models/mediapipe_pose/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Pose-Estimation can be found
   [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [BlazePose: On-device Real-time Body Pose tracking](https://arxiv.org/abs/2006.10204)
diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py
index 7bfbb700..4fd3fb91 100644
--- a/qai_hub_models/models/mediapipe_pose/export.py
+++ b/qai_hub_models/models/mediapipe_pose/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml
index 6bce9977..5bcb6fa5 100644
--- a/qai_hub_models/models/mediapipe_pose/perf.yaml
+++ b/qai_hub_models/models/mediapipe_pose/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MediaPipePoseDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 824.0
-      throughput: 1213.5922330097087
+      inference_time: 850.0
+      throughput: 1176.4705882352941
       estimated_peak_memory_range:
-        min: 20480
-        max: 1577288
+        min: 32768
+        max: 1863416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jwgovdq15
+      job_id: j1p8wzd8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 876.0
-      throughput: 1141.552511415525
+      inference_time: 880.0
+      throughput: 1136.3636363636363
       estimated_peak_memory_range:
-        min: 212992
-        max: 5551744
+        min: 2273280
+        max: 7352768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jmg94oem5
+      job_id: jwgoe37dp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1024.0
-      throughput: 976.5625
+      inference_time: 1001.0
+      throughput: 999.000999000999
       estimated_peak_memory_range:
-        min: 28672
-        max: 9153824
+        min: 471040
+        max: 10697640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jopryxjkg
+      job_id: jvgd7oylg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.478689Z'
+    timestamp: '2024-06-08T22:46:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 595.0
-      throughput: 1680.672268907563
+      inference_time: 621.0
+      throughput: 1610.3059581320451
       estimated_peak_memory_range:
-        min: 25280512
-        max: 65999520
+        min: 61440
+        max: 42407216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: j7gjl341p
+      job_id: jn5q93xmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 634.0
-      throughput: 1577.2870662460568
+      inference_time: 633.0
+      throughput: 1579.778830963665
       estimated_peak_memory_range:
-        min: 0
-        max: 42973232
+        min: 208896
+        max: 48822992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jvgdv6l6g
+      job_id: j7gjke685
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 720.0
-      throughput: 1388.888888888889
+      inference_time: 772.0
+      throughput: 1295.3367875647668
       estimated_peak_memory_range:
         min: 212992
-        max: 30255216
+        max: 32138320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jqpyd800p
+      job_id: jqp4jv7lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.478764Z'
+    timestamp: '2024-06-08T22:46:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 851.0
-      throughput: 1175.0881316098707
+      inference_time: 830.0
+      throughput: 1204.8192771084337
       estimated_peak_memory_range:
         min: 20480
-        max: 1974952
+        max: 1868176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jygz7zk4p
+      job_id: jw56qn97g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 877.0
-      throughput: 1140.2508551881415
+      inference_time: 888.0
+      throughput: 1126.126126126126
       estimated_peak_memory_range:
-        min: 225280
-        max: 15717856
+        min: 16384
+        max: 128786224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jo5mz9y7p
+      job_id: jmg99w7vg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.478813Z'
+    timestamp: '2024-06-08T22:46:41Z'
   - torchscript_onnx_qnn:
-      inference_time: 1047.0
-      throughput: 955.1098376313277
+      inference_time: 1086.0
+      throughput: 920.8103130755064
       estimated_peak_memory_range:
-        min: 507904
-        max: 507904
+        min: 1765376
+        max: 1765376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jqp4we02g
+      job_id: jygzvrq6p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1055.0
-      throughput: 947.8672985781991
+      inference_time: 1038.0
+      throughput: 963.3911368015414
       estimated_peak_memory_range:
-        min: 909312
-        max: 909312
+        min: 3256320
+        max: 3256320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: j1p87ryq5
+      job_id: jo5mv37q5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +216,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.478866Z'
+    timestamp: '2024-06-08T22:46:46Z'
 - name: MediaPipePoseLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1281.0
-      throughput: 780.64012490242
+      inference_time: 1205.0
+      throughput: 829.8755186721992
       estimated_peak_memory_range:
-        min: 12288
-        max: 2189288
+        min: 200704
+        max: 2517320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: j1pvw2xzg
+      job_id: jogkr3wo5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1294.0
-      throughput: 772.7975270479135
+      inference_time: 1306.0
+      throughput: 765.6967840735069
       estimated_peak_memory_range:
-        min: 12288
-        max: 14061248
+        min: 16384
+        max: 13996512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +247,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 306
-      job_id: jnp18oxng
+      job_id: j1pvzvymg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1681.0
-      throughput: 594.883997620464
+      inference_time: 1647.0
+      throughput: 607.1645415907711
       estimated_peak_memory_range:
-        min: 81920
-        max: 24691208
+        min: 12288
+        max: 25082496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +262,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 304
-      job_id: jep2mon65
+      job_id: jz57vxmr5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +271,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.478982Z'
+    timestamp: '2024-06-08T22:46:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 885.0
-      throughput: 1129.9435028248588
+      inference_time: 864.0
+      throughput: 1157.4074074074074
       estimated_peak_memory_range:
-        min: 16384
-        max: 88333840
+        min: 12288
+        max: 90560000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +285,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: jlpev6385
+      job_id: j1gle39lp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 939.0
-      throughput: 1064.9627263045793
+      inference_time: 944.0
+      throughput: 1059.322033898305
       estimated_peak_memory_range:
         min: 802816
-        max: 89518496
+        max: 88829488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +300,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 306
-      job_id: jz57do3n5
+      job_id: jlpe4k005
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1187.0
-      throughput: 842.4599831508003
+      inference_time: 1101.0
+      throughput: 908.2652134423251
       estimated_peak_memory_range:
-        min: 512000
-        max: 41583520
+        min: 802816
+        max: 39260784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +315,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 304
-      job_id: j2p0r900p
+      job_id: j0pxeyq95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +324,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.479097Z'
+    timestamp: '2024-06-08T22:46:45Z'
   - torchscript_onnx_tflite:
-      inference_time: 1229.0
-      throughput: 813.6696501220505
+      inference_time: 1214.0
+      throughput: 823.7232289950576
       estimated_peak_memory_range:
-        min: 16384
-        max: 2829328
+        min: 24576
+        max: 2611056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +338,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: jz5w9yn4p
+      job_id: j1p3qelz5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1303.0
-      throughput: 767.4597083653108
+      inference_time: 1308.0
+      throughput: 764.525993883792
       estimated_peak_memory_range:
-        min: 802816
-        max: 11490360
+        min: 434176
+        max: 15229872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +353,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 306
-      job_id: jegne18jg
+      job_id: jnp1qeklg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,10 +362,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.479173Z'
+    timestamp: '2024-06-08T22:46:41Z'
   - torchscript_onnx_qnn:
-      inference_time: 1494.0
-      throughput: 669.3440428380187
+      inference_time: 1463.0
+      throughput: 683.526999316473
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -376,14 +376,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 305
-      job_id: j0px1028g
+      job_id: jz5wmq0jg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1596.0
-      throughput: 626.5664160401003
+      inference_time: 1886.0
+      throughput: 530.2226935312831
       estimated_peak_memory_range:
-        min: 7958528
-        max: 7958528
+        min: 19697664
+        max: 19697664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +391,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 304
-      job_id: jogky0xvp
+      job_id: jegnr34m5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +400,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.479256Z'
+    timestamp: '2024-06-08T22:46:46Z'
diff --git a/qai_hub_models/models/mediapipe_selfie/README.md b/qai_hub_models/models/mediapipe_selfie/README.md
index fd842e56..ec08249a 100644
--- a/qai_hub_models/models/mediapipe_selfie/README.md
+++ b/qai_hub_models/models/mediapipe_selfie/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Selfie-Segmentation can be found
   [here](https://github.com/google/mediapipe/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Image segmentation guide](https://developers.google.com/mediapipe/solutions/vision/image_segmenter/)
diff --git a/qai_hub_models/models/mediapipe_selfie/export.py b/qai_hub_models/models/mediapipe_selfie/export.py
index f137ad46..374affc3 100644
--- a/qai_hub_models/models/mediapipe_selfie/export.py
+++ b/qai_hub_models/models/mediapipe_selfie/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml
index 913ec699..7f0d5fe1 100644
--- a/qai_hub_models/models/mediapipe_selfie/perf.yaml
+++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MediaPipe-Selfie-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 810.0
-      throughput: 1234.567901234568
+      inference_time: 806.0
+      throughput: 1240.6947890818858
       estimated_peak_memory_range:
-        min: 24576
-        max: 1664768
+        min: 12288
+        max: 2385600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jw561m4np
+      job_id: jqpyv6l4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 792.0
-      throughput: 1262.6262626262626
+      inference_time: 784.0
+      throughput: 1275.5102040816328
       estimated_peak_memory_range:
-        min: 28672
-        max: 20649280
+        min: 2240512
+        max: 96205696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: j7gjlyn1p
+      job_id: jogkr31o5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1339.0
-      throughput: 746.8259895444362
+      inference_time: 1346.0
+      throughput: 742.9420505200594
       estimated_peak_memory_range:
-        min: 684032
-        max: 56261672
+        min: 786432
+        max: 76785816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jmg942nm5
+      job_id: j1p3qedz5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.579849Z'
+    timestamp: '2024-06-08T22:47:12Z'
   - torchscript_onnx_tflite:
-      inference_time: 523.0
-      throughput: 1912.0458891013384
+      inference_time: 537.0
+      throughput: 1862.1973929236499
       estimated_peak_memory_range:
-        min: 16384
-        max: 23767456
+        min: 12288
+        max: 24988016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jwgovw615
+      job_id: j2p0elwe5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 512.0
       throughput: 1953.125
       estimated_peak_memory_range:
         min: 176128
-        max: 42148896
+        max: 45965632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: jlpevxm85
+      job_id: jn5q93nmp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 912.0
-      throughput: 1096.4912280701753
+      inference_time: 904.0
+      throughput: 1106.1946902654868
       estimated_peak_memory_range:
         min: 12288
-        max: 19240112
+        max: 20791344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jnp181zng
+      job_id: jwgoe3xdp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.579933Z'
+    timestamp: '2024-06-08T22:47:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 810.0
-      throughput: 1234.567901234568
+      inference_time: 803.0
+      throughput: 1245.3300124533
       estimated_peak_memory_range:
         min: 24576
-        max: 1962504
+        max: 1606304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: j1pvwmkzg
+      job_id: j1p8wzn8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 783.0
-      throughput: 1277.139208173691
+      inference_time: 782.0
+      throughput: 1278.772378516624
       estimated_peak_memory_range:
-        min: 806912
-        max: 8305720
+        min: 24576
+        max: 12402272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: jz5w9z64p
+      job_id: jw56qnx7g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.580005Z'
+    timestamp: '2024-06-08T22:47:11Z'
   - torchscript_onnx_qnn:
-      inference_time: 1023.0
-      throughput: 977.5171065493646
+      inference_time: 920.0
+      throughput: 1086.9565217391305
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: jygz7yd4p
+      job_id: j1gle3dlp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1373.0
-      throughput: 728.3321194464676
+      inference_time: 1362.0
+      throughput: 734.2143906020558
       estimated_peak_memory_range:
-        min: 1335296
-        max: 1335296
+        min: 2674688
+        max: 2674688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jvgdv416g
+      job_id: j1pvzv8mg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.580058Z'
+    timestamp: '2024-06-08T22:47:14Z'
diff --git a/qai_hub_models/models/midas/README.md b/qai_hub_models/models/midas/README.md
index dbf100cc..8eed4994 100644
--- a/qai_hub_models/models/midas/README.md
+++ b/qai_hub_models/models/midas/README.md
@@ -18,6 +18,11 @@ a hosted Qualcomm® device.
 
 ## Example & Usage
 
+Install the package via pip:
+```bash
+pip install "qai_hub_models[midas]"
+```
+
 
 Once installed, run the following simple CLI demo:
 
@@ -43,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Midas-V2 can be found
   [here](https://github.com/isl-org/MiDaS/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer](https://arxiv.org/abs/1907.01341v3)
diff --git a/qai_hub_models/models/midas/export.py b/qai_hub_models/models/midas/export.py
index 61e5504a..adcb9fbe 100644
--- a/qai_hub_models/models/midas/export.py
+++ b/qai_hub_models/models/midas/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/midas/model.py b/qai_hub_models/models/midas/model.py
index 3f6b1d53..669ecdd0 100644
--- a/qai_hub_models/models/midas/model.py
+++ b/qai_hub_models/models/midas/model.py
@@ -4,15 +4,35 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
+import sys
+
 import torch
 
+from qai_hub_models.utils.asset_loaders import (
+    CachedWebModelAsset,
+    SourceAsRoot,
+    find_replace_in_repo,
+    load_torch,
+    tmp_os_env,
+    wipe_sys_modules,
+)
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.image_processing import normalize_image_torchvision
 from qai_hub_models.utils.input_spec import InputSpec
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 1
-DEFAULT_WEIGHTS = "MiDaS_small"
+MODEL_ASSET_VERSION = 2
+
+SOURCE_REPO = "https://github.com/isl-org/MiDaS/"
+REPO_COMMIT = "bdc4ed64c095e026dc0a2f17cabb14d58263decb"
+DEFAULT_WEIGHTS = CachedWebModelAsset(
+    "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt",
+    MODEL_ID,
+    MODEL_ASSET_VERSION,
+    "midas_v21_small_256.pt",
+)
+DEFAULT_HEIGHT = 256
+DEFAULT_WIDTH = 256
 
 
 class Midas(BaseModel):
@@ -21,21 +41,76 @@ class Midas(BaseModel):
     def __init__(
         self,
         model: torch.nn.Module,
+        height: int = DEFAULT_HEIGHT,
+        width: int = DEFAULT_WIDTH,
         normalize_input: bool = True,
     ) -> None:
         super().__init__()
         self.model = model
         self.normalize_input = normalize_input
+        self.height = height
+        self.width = width
 
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> Midas:
-        model = torch.hub.load("intel-isl/MiDaS", weights).eval()
+    def from_pretrained(
+        cls,
+        weights: str = DEFAULT_WEIGHTS,
+        height: int = DEFAULT_HEIGHT,
+        width: int = DEFAULT_WIDTH,
+    ) -> Midas:
+        with SourceAsRoot(
+            SOURCE_REPO,
+            REPO_COMMIT,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            keep_sys_modules=True,
+        ) as repo_root:
+            # Temporarily set torch home to the local repo so modules get cloned
+            # locally and we can modify their code.
+            with tmp_os_env(
+                {"TORCH_HOME": repo_root, "height": str(height), "width": str(width)}
+            ):
+                # Load the dependent module first to ensure the code gets cloned.
+                # Then wipe the cached modules and make necessary code changes.
+                torch.hub.load(
+                    "rwightman/gen-efficientnet-pytorch",
+                    "tf_efficientnet_lite3",
+                    pretrained=False,
+                    skip_validation=True,
+                )
+                wipe_sys_modules(sys.modules["geffnet"])
+
+                # The default implementation creates the self.pad layer within the
+                # forward function itself, which makes it untraceable by aimet.
+                find_replace_in_repo(
+                    repo_root,
+                    "hub/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py",
+                    "self.pad = None",
+                    "self.pad = nn.ZeroPad2d(_same_pad_arg((int(os.environ['height']), int(os.environ['width'])), self.weight.shape[-2:], self.stride, self.dilation))",
+                )
+                find_replace_in_repo(
+                    repo_root,
+                    "hub/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py",
+                    "import math",
+                    "import math; import os",
+                )
+
+                from hubconf import MiDaS_small
+
+                model = MiDaS_small(pretrained=False)
+                weights = load_torch(weights)
+                model.load_state_dict(weights)
         return cls(model)
 
     @staticmethod
-    def get_input_spec(height: int = 256, width: int = 256) -> InputSpec:
+    def get_input_spec(
+        height: int = DEFAULT_HEIGHT, width: int = DEFAULT_WIDTH
+    ) -> InputSpec:
         return {"image": ((1, 3, height, width), "float32")}
 
+    def _get_input_spec_for_instance(self) -> InputSpec:
+        return self.__class__.get_input_spec(self.height, self.width)
+
     def forward(self, image):
         """
         Runs the model on an image tensor and returns a tensor of depth estimates
diff --git a/qai_hub_models/models/midas/perf.yaml b/qai_hub_models/models/midas/perf.yaml
index 1d16582c..047b62cc 100644
--- a/qai_hub_models/models/midas/perf.yaml
+++ b/qai_hub_models/models/midas/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Midas-V2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3425.0
-      throughput: 291.97080291970804
+      inference_time: 3428.0
+      throughput: 291.71528588098016
       estimated_peak_memory_range:
-        min: 16384
-        max: 2561720
+        min: 12288
+        max: 2878504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jo5mzkx7p
+      job_id: jlpe4kq05
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3375.0
-      throughput: 296.2962962962963
+      inference_time: 3372.0
+      throughput: 296.55990510083035
       estimated_peak_memory_range:
-        min: 16384
-        max: 18697592
+        min: 806912
+        max: 11534808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jep2mdy65
+      job_id: jmg99wrvg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3586.0
-      throughput: 278.8622420524261
+      inference_time: 3451.0
+      throughput: 289.77108084613155
       estimated_peak_memory_range:
         min: 12288
-        max: 158594312
+        max: 177641176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jn5q216e5
+      job_id: jqp4jvzlp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.614171Z'
+    timestamp: '2024-06-08T22:47:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 2439.0
-      throughput: 410.0041000410004
+      inference_time: 2407.0
+      throughput: 415.45492314083924
       estimated_peak_memory_range:
-        min: 16384
-        max: 78764672
+        min: 12288
+        max: 82857536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jegneqvjg
+      job_id: jygzvr66p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2405.0
-      throughput: 415.8004158004158
+      inference_time: 2404.0
+      throughput: 415.97337770382694
       estimated_peak_memory_range:
         min: 802816
-        max: 64511072
+        max: 65062640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: j2p0r9z0p
+      job_id: jnp1qe9lg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2424.0
-      throughput: 412.54125412541254
+      inference_time: 2416.0
+      throughput: 413.9072847682119
       estimated_peak_memory_range:
-        min: 802816
-        max: 41121248
+        min: 389120
+        max: 38273760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: j1glk8v2p
+      job_id: j0pxeyw95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.614261Z'
+    timestamp: '2024-06-08T22:47:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 3433.0
-      throughput: 291.29041654529567
+      inference_time: 3435.0
+      throughput: 291.1208151382824
       estimated_peak_memory_range:
         min: 16384
-        max: 2080552
+        max: 2408992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jopryd3kg
+      job_id: jz5wmqkjg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3365.0
-      throughput: 297.1768202080238
+      inference_time: 3369.0
+      throughput: 296.8239833778569
       estimated_peak_memory_range:
-        min: 12288
-        max: 18671464
+        min: 802816
+        max: 11302408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jogky0evp
+      job_id: jz57vxqr5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.614318Z'
+    timestamp: '2024-06-08T22:47:42Z'
   - torchscript_onnx_qnn:
-      inference_time: 3591.0
-      throughput: 278.473962684489
+      inference_time: 3529.0
+      throughput: 283.36639274582035
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: j1p87rqq5
+      job_id: jvgd7oklg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3422.0
-      throughput: 292.22676797194623
+      inference_time: 3447.0
+      throughput: 290.1073397156948
       estimated_peak_memory_range:
-        min: 34041856
-        max: 34041856
+        min: 9965568
+        max: 9965568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jw561mynp
+      job_id: jo5mv3jq5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.614384Z'
+    timestamp: '2024-06-08T22:47:45Z'
diff --git a/qai_hub_models/models/midas/requirements.txt b/qai_hub_models/models/midas/requirements.txt
new file mode 100644
index 00000000..70fad942
--- /dev/null
+++ b/qai_hub_models/models/midas/requirements.txt
@@ -0,0 +1 @@
+timm==1.0.3
diff --git a/qai_hub_models/models/mnasnet05/README.md b/qai_hub_models/models/mnasnet05/README.md
index f17444f4..ab0d56a1 100644
--- a/qai_hub_models/models/mnasnet05/README.md
+++ b/qai_hub_models/models/mnasnet05/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MNASNet05 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [MnasNet: Platform-Aware Neural Architecture Search for Mobile](https://arxiv.org/abs/1807.11626)
diff --git a/qai_hub_models/models/mnasnet05/evaluate.py b/qai_hub_models/models/mnasnet05/evaluate.py
new file mode 100644
index 00000000..c6de56aa
--- /dev/null
+++ b/qai_hub_models/models/mnasnet05/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.mnasnet05 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mnasnet05/export.py b/qai_hub_models/models/mnasnet05/export.py
index 389bfe46..ed49880c 100644
--- a/qai_hub_models/models/mnasnet05/export.py
+++ b/qai_hub_models/models/mnasnet05/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mnasnet05/info.yaml b/qai_hub_models/models/mnasnet05/info.yaml
index c3ce91e0..1f87a21c 100644
--- a/qai_hub_models/models/mnasnet05/info.yaml
+++ b/qai_hub_models/models/mnasnet05/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml
index 7f636ea6..0ca342e3 100644
--- a/qai_hub_models/models/mnasnet05/perf.yaml
+++ b/qai_hub_models/models/mnasnet05/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MNASNet05
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 781.0
-      throughput: 1280.4097311139565
+      inference_time: 782.0
+      throughput: 1278.772378516624
       estimated_peak_memory_range:
-        min: 24576
-        max: 1909392
+        min: 20480
+        max: 1961704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: j7gjlyv1p
+      job_id: jlpe4kj05
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 826.0
       throughput: 1210.6537530266344
       estimated_peak_memory_range:
-        min: 12288
-        max: 35312208
+        min: 618496
+        max: 5537568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jz5w9ze4p
+      job_id: jmg99w6vg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 763.0
-      throughput: 1310.615989515072
+      inference_time: 798.0
+      throughput: 1253.1328320802006
       estimated_peak_memory_range:
         min: 12288
-        max: 18924944
+        max: 155086488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: jz5w9zezp
+      job_id: jqp4jvqlp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.648776Z'
+    timestamp: '2024-06-08T22:49:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 523.0
-      throughput: 1912.0458891013384
+      inference_time: 546.0
+      throughput: 1831.5018315018315
       estimated_peak_memory_range:
-        min: 16384
-        max: 45982896
+        min: 12288
+        max: 46076672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jlpevxd85
+      job_id: jygzvr16p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 563.0
-      throughput: 1776.1989342806394
+      inference_time: 564.0
+      throughput: 1773.049645390071
       estimated_peak_memory_range:
         min: 0
-        max: 39385456
+        max: 41703392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jmg942lm5
+      job_id: jnp1qerlg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 551.0
-      throughput: 1814.8820326678765
+      inference_time: 560.0
+      throughput: 1785.7142857142858
       estimated_peak_memory_range:
-        min: 618496
-        max: 28569088
+        min: 31727616
+        max: 59957408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: jmg942lq5
+      job_id: j0pxeyv95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.648842Z'
+    timestamp: '2024-06-08T22:49:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 780.0
-      throughput: 1282.051282051282
+      inference_time: 773.0
+      throughput: 1293.6610608020699
       estimated_peak_memory_range:
         min: 12288
-        max: 1638504
+        max: 159861568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jygz7y34p
+      job_id: jz5wmqjjg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 823.0
-      throughput: 1215.0668286755772
+      inference_time: 826.0
+      throughput: 1210.6537530266344
       estimated_peak_memory_range:
-        min: 28672
-        max: 34400464
+        min: 16384
+        max: 14027976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jvgdv4x6g
+      job_id: jz57vxzr5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.648882Z'
+    timestamp: '2024-06-08T22:49:15Z'
   - torchscript_onnx_qnn:
-      inference_time: 941.0
-      throughput: 1062.6992561105208
+      inference_time: 946.0
+      throughput: 1057.0824524312895
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 606208
+        max: 606208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jnp1814ng
+      job_id: jvgd7ojlg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 819.0
-      throughput: 1221.001221001221
+      inference_time: 807.0
+      throughput: 1239.1573729863692
       estimated_peak_memory_range:
-        min: 19140608
-        max: 19140608
+        min: 18001920
+        max: 18001920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: jnp1814kg
+      job_id: jo5mv3rq5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.648928Z'
+    timestamp: '2024-06-08T22:49:18Z'
diff --git a/qai_hub_models/models/mobilenet_v2/README.md b/qai_hub_models/models/mobilenet_v2/README.md
index 4c9f4616..7426d634 100644
--- a/qai_hub_models/models/mobilenet_v2/README.md
+++ b/qai_hub_models/models/mobilenet_v2/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v2 can be found
   [here](https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
diff --git a/qai_hub_models/models/mobilenet_v2/evaluate.py b/qai_hub_models/models/mobilenet_v2/evaluate.py
new file mode 100644
index 00000000..63c5f4a4
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v2/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.mobilenet_v2 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v2/export.py b/qai_hub_models/models/mobilenet_v2/export.py
index 4162e418..3201e32a 100644
--- a/qai_hub_models/models/mobilenet_v2/export.py
+++ b/qai_hub_models/models/mobilenet_v2/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v2/info.yaml b/qai_hub_models/models/mobilenet_v2/info.yaml
index 693da4bb..181e3f7b 100644
--- a/qai_hub_models/models/mobilenet_v2/info.yaml
+++ b/qai_hub_models/models/mobilenet_v2/info.yaml
@@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml
index dfb984fb..bdd64620 100644
--- a/qai_hub_models/models/mobilenet_v2/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v2/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MobileNet-v2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 938.0
-      throughput: 1066.0980810234541
+      inference_time: 940.0
+      throughput: 1063.8297872340424
       estimated_peak_memory_range:
-        min: 16384
-        max: 1921016
+        min: 57344
+        max: 1721784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jz57dnyq5
+      job_id: jopr1ekeg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1263.0
-      throughput: 791.7656373713381
+      inference_time: 1266.0
+      throughput: 789.8894154818325
       estimated_peak_memory_range:
         min: 622592
-        max: 149245264
+        max: 53135336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jo5mzknyp
+      job_id: j2p0elye5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 974.0
-      throughput: 1026.694045174538
+      inference_time: 938.0
+      throughput: 1066.0980810234541
       estimated_peak_memory_range:
-        min: 12288
-        max: 34226088
+        min: 16384
+        max: 21567360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jqpyd2zrp
+      job_id: j1gle3nlp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.683566Z'
+    timestamp: '2024-06-08T22:49:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 642.0
-      throughput: 1557.632398753894
+      inference_time: 643.0
+      throughput: 1555.2099533437015
       estimated_peak_memory_range:
-        min: 12288
-        max: 56648848
+        min: 0
+        max: 58244480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jqp4w4lqg
+      job_id: jep23l8mg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 829.0
-      throughput: 1206.2726176115802
+      inference_time: 826.0
+      throughput: 1210.6537530266344
       estimated_peak_memory_range:
         min: 618496
-        max: 39436848
+        max: 40424432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jegneq0vg
+      job_id: j1p8wzo8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 667.0
-      throughput: 1499.2503748125937
+      inference_time: 666.0
+      throughput: 1501.5015015015015
       estimated_peak_memory_range:
-        min: 618496
-        max: 26671744
+        min: 487424
+        max: 27269952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: j2p0r942p
+      job_id: jw56qn67g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.683632Z'
+    timestamp: '2024-06-08T22:49:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 939.0
-      throughput: 1064.9627263045793
+      inference_time: 941.0
+      throughput: 1062.6992561105208
       estimated_peak_memory_range:
-        min: 28672
-        max: 2045656
+        min: 20480
+        max: 1483664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: j0px1rkjg
+      job_id: jqpyv6e4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1270.0
-      throughput: 787.4015748031496
+      inference_time: 1272.0
+      throughput: 786.1635220125786
       estimated_peak_memory_range:
-        min: 24576
-        max: 51022400
+        min: 618496
+        max: 41687968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jep2mdxx5
+      job_id: jn5q938mp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.683673Z'
+    timestamp: '2024-06-08T22:49:42Z'
   - torchscript_onnx_qnn:
-      inference_time: 1557.0
-      throughput: 642.2607578676943
+      inference_time: 1555.0
+      throughput: 643.0868167202573
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 1355776
+        max: 1355776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jopryd6vg
+      job_id: jogkr3zo5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1003.0
-      throughput: 997.0089730807578
+      inference_time: 987.0
+      throughput: 1013.1712259371834
       estimated_peak_memory_range:
-        min: 4685824
-        max: 4685824
+        min: 5607424
+        max: 5607424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: j1p87r2z5
+      job_id: j1p3qekz5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.683718Z'
+    timestamp: '2024-06-08T22:49:45Z'
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/README.md b/qai_hub_models/models/mobilenet_v2_quantized/README.md
index e8db3017..c2ca082f 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/README.md
+++ b/qai_hub_models/models/mobilenet_v2_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v2-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/evaluate.py b/qai_hub_models/models/mobilenet_v2_quantized/evaluate.py
new file mode 100644
index 00000000..76dd0581
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v2_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.mobilenet_v2_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/export.py b/qai_hub_models/models/mobilenet_v2_quantized/export.py
index 231631ce..b025f312 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/export.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml
index 302fcc0a..973d4b15 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml
+++ b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml
@@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
index 499836d3..f7621f54 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: MobileNet-v2-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 294.0
-      throughput: 3401.360544217687
+      inference_time: 291.0
+      throughput: 3436.426116838488
       estimated_peak_memory_range:
-        min: 12288
-        max: 1579192
+        min: 53248
+        max: 1718392
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jn5q21075
+      job_id: j1pvzv3mg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 658.0
-      throughput: 1519.756838905775
+      inference_time: 647.0
+      throughput: 1545.595054095827
       estimated_peak_memory_range:
-        min: 12288
-        max: 7011992
+        min: 45056
+        max: 16933008
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: j1pvwmq7g
+      job_id: jmg99wvvg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 640.0
-      throughput: 1562.5
+      inference_time: 549.0
+      throughput: 1821.4936247723133
       estimated_peak_memory_range:
-        min: 32768
-        max: 53423304
+        min: 12288
+        max: 22837192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 77
+        layers_on_npu: 74
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 77
-      job_id: jmg9420q5
+        total_layers: 74
+      job_id: jmg99w1lg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.718178Z'
+    timestamp: '2024-06-08T22:50:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 207.0
-      throughput: 4830.917874396136
+      inference_time: 215.0
+      throughput: 4651.162790697675
       estimated_peak_memory_range:
         min: 12288
-        max: 37431712
+        max: 38045216
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: j1glk84ep
+      job_id: j7gjkex85
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 475.0
-      throughput: 2105.2631578947367
+      inference_time: 474.0
+      throughput: 2109.7046413502107
       estimated_peak_memory_range:
         min: 163840
-        max: 35567776
+        max: 38345936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: j7gjlyd7p
+      job_id: jnp1qe0lg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 485.0
-      throughput: 2061.855670103093
+      inference_time: 395.0
+      throughput: 2531.6455696202534
       estimated_peak_memory_range:
-        min: 0
-        max: 21634080
+        min: 12288
+        max: 23651472
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 77
+        layers_on_npu: 74
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 77
-      job_id: jnp1812kg
+        total_layers: 74
+      job_id: jnp1qel2g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.718236Z'
+    timestamp: '2024-06-08T22:50:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 317.0
-      throughput: 3154.5741324921137
+      inference_time: 301.0
+      throughput: 3322.2591362126245
       estimated_peak_memory_range:
         min: 12288
-        max: 1549360
+        max: 1685448
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jw561m2vp
+      job_id: jlpe4k905
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 653.0
-      throughput: 1531.3935681470139
+      inference_time: 654.0
+      throughput: 1529.051987767584
       estimated_peak_memory_range:
-        min: 24576
-        max: 5875912
+        min: 16384
+        max: 123157128
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jygz7y2zp
+      job_id: jz5wmqv6g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.718274Z'
+    timestamp: '2024-06-08T22:50:22Z'
   - torchscript_onnx_tflite:
-      inference_time: 825.0
-      throughput: 1212.121212121212
+      inference_time: 850.0
+      throughput: 1176.4705882352941
       estimated_peak_memory_range:
         min: 12288
-        max: 23357440
+        max: 24025456
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: j1p3m7nxg
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1469.0
-      throughput: 680.7351940095303
-      estimated_peak_memory_range:
-        min: 315392
-        max: 35338656
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 71
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 71
-      job_id: jz5w9zwzp
+      job_id: jygzvre6p
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:41.718310Z'
+    timestamp: '2024-06-08T22:50:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 7302.0
-      throughput: 136.9487811558477
+      inference_time: 7601.0
+      throughput: 131.56163662675965
       estimated_peak_memory_range:
-        min: 118784
-        max: 6739360
+        min: 253952
+        max: 8158832
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 2
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jwgovwz45
+      job_id: jz5wmqojg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:41.718332Z'
+    timestamp: '2024-06-08T22:50:17Z'
   - torchscript_onnx_qnn:
-      inference_time: 771.0
-      throughput: 1297.0168612191958
+      inference_time: 740.0
+      throughput: 1351.3513513513512
       estimated_peak_memory_range:
-        min: 610304
-        max: 610304
+        min: 696320
+        max: 696320
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jlpevxo75
+      job_id: jvgd7owlg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 762.0
-      throughput: 1312.3359580052493
+      inference_time: 554.0
+      throughput: 1805.0541516245487
       estimated_peak_memory_range:
-        min: 19554304
-        max: 19554304
+        min: 20283392
+        max: 20283392
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 77
+        layers_on_npu: 74
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 77
-      job_id: jvgdv4nkg
+        total_layers: 74
+      job_id: jvgd7o9eg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.718370Z'
+    timestamp: '2024-06-08T22:50:25Z'
diff --git a/qai_hub_models/models/mobilenet_v3_large/README.md b/qai_hub_models/models/mobilenet_v3_large/README.md
index bf675b22..3084f4fb 100644
--- a/qai_hub_models/models/mobilenet_v3_large/README.md
+++ b/qai_hub_models/models/mobilenet_v3_large/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v3-Large can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
diff --git a/qai_hub_models/models/mobilenet_v3_large/evaluate.py b/qai_hub_models/models/mobilenet_v3_large/evaluate.py
new file mode 100644
index 00000000..919ac111
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.mobilenet_v3_large import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v3_large/export.py b/qai_hub_models/models/mobilenet_v3_large/export.py
index 40b17a01..da4d660d 100644
--- a/qai_hub_models/models/mobilenet_v3_large/export.py
+++ b/qai_hub_models/models/mobilenet_v3_large/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v3_large/info.yaml b/qai_hub_models/models/mobilenet_v3_large/info.yaml
index be6173d9..d276b6db 100644
--- a/qai_hub_models/models/mobilenet_v3_large/info.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml
index c0d437b8..8ea50f02 100644
--- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MobileNet-v3-Large
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1003.0
-      throughput: 997.0089730807578
+      inference_time: 999.0
+      throughput: 1001.001001001001
       estimated_peak_memory_range:
-        min: 24576
-        max: 17861680
+        min: 16384
+        max: 1600024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 136
-      job_id: jqp4w4nqg
+      job_id: jqp4jvovp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1040.0
-      throughput: 961.5384615384615
+      inference_time: 1048.0
+      throughput: 954.1984732824427
       estimated_peak_memory_range:
-        min: 16384
-        max: 57725416
+        min: 647168
+        max: 48048184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jegneqlvg
+      job_id: jegnr3yr5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1026.0
-      throughput: 974.6588693957115
+      inference_time: 1039.0
+      throughput: 962.4639076034649
       estimated_peak_memory_range:
         min: 12288
-        max: 51031736
+        max: 82696432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 162
-      job_id: j1p87r0z5
+      job_id: j2p0elq65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.761794Z'
+    timestamp: '2024-06-08T22:50:50Z'
   - torchscript_onnx_tflite:
-      inference_time: 678.0
-      throughput: 1474.9262536873157
+      inference_time: 703.0
+      throughput: 1422.475106685633
       estimated_peak_memory_range:
         min: 12288
-        max: 61079392
+        max: 62391952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 136
-      job_id: j0px1r9jg
+      job_id: j0pxeyj15
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 716.0
-      throughput: 1396.6480446927374
+      inference_time: 718.0
+      throughput: 1392.757660167131
       estimated_peak_memory_range:
-        min: 0
-        max: 49042560
+        min: 618496
+        max: 51941056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jopryd8vg
+      job_id: jopr1eq9g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 738.0
-      throughput: 1355.0135501355014
+      inference_time: 713.0
+      throughput: 1402.5245441795232
       estimated_peak_memory_range:
-        min: 12288
-        max: 28525824
+        min: 618496
+        max: 29120336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 162
-      job_id: jogky07yp
+      job_id: j1p8wz9xp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.761876Z'
+    timestamp: '2024-06-08T22:50:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 1002.0
-      throughput: 998.003992015968
+      inference_time: 1001.0
+      throughput: 999.000999000999
       estimated_peak_memory_range:
-        min: 28672
-        max: 2042160
+        min: 45056
+        max: 1507408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 136
-      job_id: jo5mzkeyp
+      job_id: jo5mv32w5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1035.0
-      throughput: 966.1835748792271
+      inference_time: 1042.0
+      throughput: 959.6928982725528
       estimated_peak_memory_range:
-        min: 618496
-        max: 26767128
+        min: 626688
+        max: 69049656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: j2p0r932p
+      job_id: jqpyv6w7p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.761928Z'
+    timestamp: '2024-06-08T22:50:49Z'
   - torchscript_onnx_qnn:
-      inference_time: 1213.0
-      throughput: 824.4023083264633
+      inference_time: 1199.0
+      throughput: 834.0283569641368
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jqpyd2rrp
+      job_id: jep23l64g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1116.0
-      throughput: 896.0573476702509
+      inference_time: 1086.0
+      throughput: 920.8103130755064
       estimated_peak_memory_range:
-        min: 62279680
-        max: 62279680
+        min: 51040256
+        max: 51040256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 162
-      job_id: jn5q21e75
+      job_id: jogkr3n25
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.761984Z'
+    timestamp: '2024-06-08T22:50:52Z'
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md
index b7ace7ce..1feab19d 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v3-Large-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py b/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py
new file mode 100644
index 00000000..39314070
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.mobilenet_v3_large_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
index ec90b231..7948d791 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml
index 9232ebd6..7d7507ae 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml
@@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
index a7a39b53..b5cebb9b 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: MobileNet-v3-Large-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 359.0
-      throughput: 2785.515320334262
+      inference_time: 371.0
+      throughput: 2695.4177897574123
       estimated_peak_memory_range:
-        min: 12288
-        max: 1354904
+        min: 16384
+        max: 1268000
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: jwgovwk45
+      job_id: j1gle3z8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 628.0
-      throughput: 1592.3566878980891
+      inference_time: 622.0
+      throughput: 1607.717041800643
       estimated_peak_memory_range:
         min: 16384
-        max: 51512504
+        max: 12184136
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5w9z2zp
+      job_id: j7gjkemx5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5349.0
-      throughput: 186.9508319312021
+      inference_time: 5186.0
+      throughput: 192.8268414963363
       estimated_peak_memory_range:
-        min: 18890752
-        max: 36685080
+        min: 18886656
+        max: 272750360
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 150
+        layers_on_npu: 147
         layers_on_gpu: 0
         layers_on_cpu: 24
-        total_layers: 174
-      job_id: jqp4w4kqg
+        total_layers: 171
+      job_id: jmg99welg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.796254Z'
+    timestamp: '2024-06-08T22:51:35Z'
   - torchscript_onnx_tflite:
       inference_time: 255.0
       throughput: 3921.5686274509803
       estimated_peak_memory_range:
         min: 12288
-        max: 47702144
+        max: 48279952
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: j1pvwm07g
+      job_id: jw56qnj0g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 456.0
-      throughput: 2192.9824561403507
+      inference_time: 451.0
+      throughput: 2217.2949002217297
       estimated_peak_memory_range:
         min: 163840
-        max: 45563744
+        max: 50970896
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jmg942jq5
+      job_id: jlpe4k115
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4341.0
-      throughput: 230.361667818475
+      inference_time: 4385.0
+      throughput: 228.05017103762827
       estimated_peak_memory_range:
-        min: 16400384
-        max: 54777648
+        min: 17133568
+        max: 61050864
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 150
+        layers_on_npu: 147
         layers_on_gpu: 0
         layers_on_cpu: 24
-        total_layers: 174
-      job_id: j0px1rnjg
+        total_layers: 171
+      job_id: jnp1qex2g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.796335Z'
+    timestamp: '2024-06-08T22:51:36Z'
   - torchscript_onnx_tflite:
       inference_time: 353.0
       throughput: 2832.8611898016998
       estimated_peak_memory_range:
         min: 12288
-        max: 1793632
+        max: 2106960
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: j7gjlyz7p
+      job_id: j1p3qe3l5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 627.0
-      throughput: 1594.896331738437
+      inference_time: 626.0
+      throughput: 1597.444089456869
       estimated_peak_memory_range:
-        min: 16384
-        max: 15890856
+        min: 184320
+        max: 6302512
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jvgdv4ekg
+      job_id: jz5wmqn6g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.796386Z'
+    timestamp: '2024-06-08T22:51:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 1160.0
-      throughput: 862.0689655172414
+      inference_time: 1170.0
+      throughput: 854.7008547008547
       estimated_peak_memory_range:
         min: 12288
-        max: 28257056
+        max: 28920160
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: jlpevxe75
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1608.0
-      throughput: 621.8905472636816
-      estimated_peak_memory_range:
-        min: 12288
-        max: 49331408
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 126
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 126
-      job_id: jz57dn0q5
+      job_id: jwgoe30xp
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:41.796435Z'
+    timestamp: '2024-06-08T22:51:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 6893.0
-      throughput: 145.0747134774409
+      inference_time: 6878.0
+      throughput: 145.39110206455365
       estimated_peak_memory_range:
         min: 45056
-        max: 7248032
+        max: 2149272
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: jygz7yozp
+      job_id: j1pvzvojg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:41.796463Z'
+    timestamp: '2024-06-08T22:51:30Z'
   - torchscript_onnx_qnn:
-      inference_time: 713.0
-      throughput: 1402.5245441795232
+      inference_time: 716.0
+      throughput: 1396.6480446927374
       estimated_peak_memory_range:
-        min: 569344
-        max: 569344
+        min: 643072
+        max: 643072
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jnp181ykg
+      job_id: jygzvr9kp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4668.0
-      throughput: 214.22450728363324
+      inference_time: 4701.0
+      throughput: 212.72069772388852
       estimated_peak_memory_range:
-        min: 16908288
-        max: 16908288
+        min: 26042368
+        max: 26042368
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 150
+        layers_on_npu: 147
         layers_on_gpu: 0
         layers_on_cpu: 24
-        total_layers: 174
-      job_id: jo5mzkqyp
+        total_layers: 171
+      job_id: jvgd7oleg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.796520Z'
+    timestamp: '2024-06-08T22:51:37Z'
diff --git a/qai_hub_models/models/mobilenet_v3_small/README.md b/qai_hub_models/models/mobilenet_v3_small/README.md
index 9d058839..bea9dec8 100644
--- a/qai_hub_models/models/mobilenet_v3_small/README.md
+++ b/qai_hub_models/models/mobilenet_v3_small/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v3-Small can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
diff --git a/qai_hub_models/models/mobilenet_v3_small/evaluate.py b/qai_hub_models/models/mobilenet_v3_small/evaluate.py
new file mode 100644
index 00000000..5ebb691f
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_small/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.mobilenet_v3_small import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v3_small/export.py b/qai_hub_models/models/mobilenet_v3_small/export.py
index 70b1d899..f7fe3203 100644
--- a/qai_hub_models/models/mobilenet_v3_small/export.py
+++ b/qai_hub_models/models/mobilenet_v3_small/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v3_small/info.yaml b/qai_hub_models/models/mobilenet_v3_small/info.yaml
index 8984b9c8..67c610bc 100644
--- a/qai_hub_models/models/mobilenet_v3_small/info.yaml
+++ b/qai_hub_models/models/mobilenet_v3_small/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml
index 7bf1b957..10857f0f 100644
--- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: MobileNet-v3-Small
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 834.0
-      throughput: 1199.0407673860911
+      inference_time: 835.0
+      throughput: 1197.6047904191616
       estimated_peak_memory_range:
-        min: 24576
-        max: 1878808
+        min: 16384
+        max: 1873408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jopryd2vg
+      job_id: jqp4jv0vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 866.0
-      throughput: 1154.7344110854503
+      inference_time: 882.0
+      throughput: 1133.7868480725624
       estimated_peak_memory_range:
         min: 16384
-        max: 45639048
+        max: 13725872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j2p0r922p
+      job_id: jegnr38r5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 826.0
-      throughput: 1210.6537530266344
+      inference_time: 824.0
+      throughput: 1213.5922330097087
       estimated_peak_memory_range:
         min: 12288
-        max: 75798856
+        max: 57762312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j1glk82ep
+      job_id: j2p0el765
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.840059Z'
+    timestamp: '2024-06-08T22:52:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 568.0
-      throughput: 1760.5633802816901
+      inference_time: 547.0
+      throughput: 1828.1535648994516
       estimated_peak_memory_range:
-        min: 16384
-        max: 41065712
+        min: 12288
+        max: 42129856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jep2md9x5
+      job_id: j0pxey215
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 583.0
       throughput: 1715.2658662092624
       estimated_peak_memory_range:
-        min: 12288
-        max: 43536656
+        min: 0
+        max: 47338784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j1p87rmz5
+      job_id: jopr1ej9g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 554.0
-      throughput: 1805.0541516245487
+      inference_time: 586.0
+      throughput: 1706.4846416382252
       estimated_peak_memory_range:
-        min: 19972096
-        max: 46699072
+        min: 524288
+        max: 27846320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jw561mzvp
+      job_id: j1p8wzvxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.840135Z'
+    timestamp: '2024-06-08T22:52:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 836.0
-      throughput: 1196.1722488038276
+      inference_time: 832.0
+      throughput: 1201.923076923077
       estimated_peak_memory_range:
-        min: 28672
-        max: 1727808
+        min: 24576
+        max: 2336768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jqpyd2jrp
+      job_id: jo5mv3yw5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 868.0
-      throughput: 1152.073732718894
+      inference_time: 867.0
+      throughput: 1153.4025374855826
       estimated_peak_memory_range:
         min: 12288
-        max: 24180000
+        max: 35394896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jn5q21r75
+      job_id: jqpyv607p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.840181Z'
+    timestamp: '2024-06-08T22:52:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 1065.0
-      throughput: 938.9671361502348
+      inference_time: 1018.0
+      throughput: 982.3182711198428
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 1249280
+        max: 1249280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jogky0qyp
+      job_id: jep23ln4g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 871.0
-      throughput: 1148.105625717566
+      inference_time: 879.0
+      throughput: 1137.6564277588168
       estimated_peak_memory_range:
-        min: 16216064
-        max: 16216064
+        min: 16596992
+        max: 16596992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j1p3m71xg
+      job_id: jogkr3m25
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.840231Z'
+    timestamp: '2024-06-08T22:52:05Z'
diff --git a/qai_hub_models/models/openai_clip/README.md b/qai_hub_models/models/openai_clip/README.md
index 06c429e8..0455ec79 100644
--- a/qai_hub_models/models/openai_clip/README.md
+++ b/qai_hub_models/models/openai_clip/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of OpenAI-Clip can be found
   [here](https://github.com/openai/CLIP/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020)
diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py
index 4cc801ca..08ce7d6a 100644
--- a/qai_hub_models/models/openai_clip/export.py
+++ b/qai_hub_models/models/openai_clip/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml
index da7d3a0d..785f83c0 100644
--- a/qai_hub_models/models/openai_clip/perf.yaml
+++ b/qai_hub_models/models/openai_clip/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: CLIPTextEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 13251.0
-      throughput: 75.4660025658441
+      inference_time: 13293.0
+      throughput: 75.22756337922215
       estimated_peak_memory_range:
         min: 20480
-        max: 3401864
+        max: 3340864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: j1pvwmr7g
+      job_id: j1gle3r8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7849.0
-      throughput: 127.40476493820869
+      inference_time: 7810.0
+      throughput: 128.04097311139566
       estimated_peak_memory_range:
-        min: 16384
-        max: 25143840
+        min: 24576
+        max: 31351376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jnp181wkg
+      job_id: jlpe4k315
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 31734.0
-      throughput: 31.511943026407007
+      inference_time: 31397.0
+      throughput: 31.850176768481067
       estimated_peak_memory_range:
-        min: 65536
-        max: 333763384
+        min: 57344
+        max: 324810128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 389
-      job_id: jep2mdlx5
+      job_id: j0pxe1o15
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.874657Z'
+    timestamp: '2024-06-08T22:53:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 9373.0
-      throughput: 106.68942707777659
+      inference_time: 9408.0
+      throughput: 106.29251700680273
       estimated_peak_memory_range:
-        min: 0
-        max: 209589136
+        min: 36864
+        max: 211531120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: jlpevxw75
+      job_id: j1p3qe2l5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5487.0
-      throughput: 182.2489520685256
+      inference_time: 5496.0
+      throughput: 181.9505094614265
       estimated_peak_memory_range:
         min: 12288
-        max: 139438448
+        max: 143518544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jz57dnlq5
+      job_id: jz5wm966g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 22304.0
-      throughput: 44.83500717360115
+      inference_time: 22333.0
+      throughput: 44.776787713249455
       estimated_peak_memory_range:
-        min: 40960
-        max: 187710464
+        min: 36864
+        max: 188583968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 389
-      job_id: j2p0r9l2p
+      job_id: jegnrevr5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.874827Z'
+    timestamp: '2024-06-08T22:53:06Z'
   - torchscript_onnx_tflite:
-      inference_time: 13297.0
-      throughput: 75.2049334436339
+      inference_time: 13221.0
+      throughput: 75.6372437788367
       estimated_peak_memory_range:
-        min: 65536
-        max: 3472688
+        min: 40960
+        max: 2903592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: jz5w9z3zp
+      job_id: j1pvzvxjg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7821.0
-      throughput: 127.86088735455824
+      inference_time: 7775.0
+      throughput: 128.61736334405145
       estimated_peak_memory_range:
-        min: 20480
-        max: 26256120
+        min: 16384
+        max: 18711280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jegneq3vg
+      job_id: jz57vdrl5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.874948Z'
+    timestamp: '2024-06-08T22:53:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 8459.0
-      throughput: 118.21728336682823
+      inference_time: 8431.0
+      throughput: 118.60989206499822
       estimated_peak_memory_range:
-        min: 147456
-        max: 147456
+        min: 159744
+        max: 159744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: j0px1r6jg
+      job_id: jnp1q8z2g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 32955.0
-      throughput: 30.344409042633895
+      inference_time: 32547.0
+      throughput: 30.724797984453254
       estimated_peak_memory_range:
-        min: 332324864
-        max: 332324864
+        min: 40755200
+        max: 40755200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 389
-      job_id: jogky03yp
+      job_id: jep23my4g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +216,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.875049Z'
+    timestamp: '2024-06-08T22:53:08Z'
 - name: CLIPImageEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 126637.0
-      throughput: 7.896586305740029
+      inference_time: 126539.0
+      throughput: 7.902701933791163
       estimated_peak_memory_range:
-        min: 135168
-        max: 3687832
+        min: 0
+        max: 273708336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 576
-      job_id: j7gjly27p
+      job_id: jw56qnl0g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50638.0
-      throughput: 19.748015324459892
+      inference_time: 50274.0
+      throughput: 19.890997334606357
       estimated_peak_memory_range:
-        min: 45056
-        max: 64082216
+        min: 126976
+        max: 66170792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,22 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: jvgdv4qkg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 171916.0
-      throughput: 5.8167942483538475
-      estimated_peak_memory_range:
-        min: 16384
-        max: 538222224
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 382
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 382
-      job_id: jqpyd26rp
+      job_id: jygzvrkkp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +256,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.875215Z'
+    timestamp: '2024-06-08T22:52:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 96011.0
-      throughput: 10.41547322702607
+      inference_time: 96320.0
+      throughput: 10.382059800664452
       estimated_peak_memory_range:
-        min: 192512
-        max: 749633296
+        min: 188416
+        max: 752672896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +270,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 576
-      job_id: jygz7yjzp
+      job_id: jwgoe3qxp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 37501.0
-      throughput: 26.665955574518012
+      inference_time: 37784.0
+      throughput: 26.46622909167902
       estimated_peak_memory_range:
-        min: 655360
-        max: 195743056
+        min: 634880
+        max: 197848448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +285,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: jqp4w4dqg
+      job_id: jmg994nlg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 131686.0
-      throughput: 7.593821666691979
+      inference_time: 129578.0
+      throughput: 7.717359428298013
       estimated_peak_memory_range:
-        min: 618496
-        max: 1275686304
+        min: 659456
+        max: 1273480192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +300,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 382
-      job_id: j1p87rzz5
+      job_id: jopr1y39g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +309,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.875378Z'
+    timestamp: '2024-06-08T22:53:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 126479.0
-      throughput: 7.906450873267499
+      inference_time: 125864.0
+      throughput: 7.945083582279286
       estimated_peak_memory_range:
-        min: 0
-        max: 274583072
+        min: 143360
+        max: 4010376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +323,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 576
-      job_id: jmg942yq5
+      job_id: j7gjke4x5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50667.0
-      throughput: 19.736712258471982
+      inference_time: 50577.0
+      throughput: 19.771833046641753
       estimated_peak_memory_range:
-        min: 65536
-        max: 59760816
+        min: 77824
+        max: 66028648
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +338,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: joprydevg
+      job_id: jqp4jwrvp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,10 +347,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.875496Z'
+    timestamp: '2024-06-08T22:53:03Z'
   - torchscript_onnx_qnn:
-      inference_time: 48879.0
-      throughput: 20.458683688291494
+      inference_time: 48611.0
+      throughput: 20.57147559194421
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -376,14 +361,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 369
-      job_id: jo5mzk6yp
+      job_id: jvgd7v1eg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 168534.0
-      throughput: 5.933520832591643
+      inference_time: 168455.0
+      throughput: 5.936303463833071
       estimated_peak_memory_range:
-        min: 492019712
-        max: 492019712
+        min: 468086784
+        max: 468086784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +376,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 382
-      job_id: jn5q21375
+      job_id: jqpyvd37p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +385,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.875594Z'
+    timestamp: '2024-06-08T22:53:08Z'
diff --git a/qai_hub_models/models/openpose/README.md b/qai_hub_models/models/openpose/README.md
index f5b7c4f4..ad423618 100644
--- a/qai_hub_models/models/openpose/README.md
+++ b/qai_hub_models/models/openpose/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of OpenPose can be found
   [here](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [OpenPose: Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields](https://arxiv.org/abs/1812.08008)
diff --git a/qai_hub_models/models/openpose/export.py b/qai_hub_models/models/openpose/export.py
index c37d9c3d..18061fc5 100644
--- a/qai_hub_models/models/openpose/export.py
+++ b/qai_hub_models/models/openpose/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml
index acbfb747..30e2f47e 100644
--- a/qai_hub_models/models/openpose/perf.yaml
+++ b/qai_hub_models/models/openpose/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: OpenPose
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11695.0
-      throughput: 85.50662676357418
+      inference_time: 12008.0
+      throughput: 83.27781479013991
       estimated_peak_memory_range:
-        min: 200704
-        max: 2684240
+        min: 217088
+        max: 2747920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: j1p3m7exg
+      job_id: jogkryx25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11773.0
-      throughput: 84.94011721736176
+      inference_time: 11771.0
+      throughput: 84.95454931611587
       estimated_peak_memory_range:
-        min: 634880
-        max: 241091432
+        min: 45056
+        max: 240267896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: j7gjlye7p
+      job_id: jw56q140g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 12281.0
-      throughput: 81.4265939255761
+      inference_time: 11936.0
+      throughput: 83.78016085790885
       estimated_peak_memory_range:
-        min: 2134016
-        max: 399666784
+        min: 0
+        max: 374382256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jmg942wq5
+      job_id: j7gjklnx5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.938307Z'
+    timestamp: '2024-06-08T22:55:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 8716.0
-      throughput: 114.73152822395595
+      inference_time: 8742.0
+      throughput: 114.39029970258522
       estimated_peak_memory_range:
-        min: 212992
-        max: 35478464
+        min: 12288
+        max: 33837760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jwgovw345
+      job_id: jn5q92q4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 8753.0
-      throughput: 114.24654404204273
+      inference_time: 8755.0
+      throughput: 114.22044545973729
       estimated_peak_memory_range:
         min: 618496
-        max: 54462240
+        max: 53012064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jlpevxk75
+      job_id: j1p3qm0l5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 9089.0
-      throughput: 110.02310485201892
+      inference_time: 9006.0
+      throughput: 111.0370863868532
       estimated_peak_memory_range:
-        min: 634880
-        max: 30320816
+        min: 700416
+        max: 31196368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jnp181ekg
+      job_id: jlpe4vm15
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.938390Z'
+    timestamp: '2024-06-08T22:55:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 11834.0
-      throughput: 84.50228156160216
+      inference_time: 11695.0
+      throughput: 85.50662676357418
       estimated_peak_memory_range:
-        min: 229376
-        max: 2876192
+        min: 196608
+        max: 2975008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: j1pvwmv7g
+      job_id: j1glekm8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11787.0
-      throughput: 84.83922965979468
+      inference_time: 11765.0
+      throughput: 84.99787505312368
       estimated_peak_memory_range:
-        min: 618496
-        max: 240822560
+        min: 12288
+        max: 229599440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jz5w9zqzp
+      job_id: j1pvzwkjg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.938443Z'
+    timestamp: '2024-06-08T22:55:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 14114.0
-      throughput: 70.85163667280715
+      inference_time: 14100.0
+      throughput: 70.92198581560284
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jygz7yrzp
+      job_id: jwgoev6xp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 12351.0
-      throughput: 80.96510404015869
+      inference_time: 12365.0
+      throughput: 80.87343307723413
       estimated_peak_memory_range:
-        min: 93835264
-        max: 93835264
+        min: 88932352
+        max: 88932352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jvgdv4okg
+      job_id: jygzv7dkp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.938505Z'
+    timestamp: '2024-06-08T22:55:05Z'
diff --git a/qai_hub_models/models/posenet_mobilenet/README.md b/qai_hub_models/models/posenet_mobilenet/README.md
index ff11e607..8f4ea678 100644
--- a/qai_hub_models/models/posenet_mobilenet/README.md
+++ b/qai_hub_models/models/posenet_mobilenet/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Posenet-Mobilenet can be found
   [here](https://github.com/rwightman/posenet-pytorch/blob/master/LICENSE.txt).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [PersonLab: Person Pose Estimation and Instance Segmentation with a Bottom-Up, Part-Based, Geometric Embedding Model](https://arxiv.org/abs/1803.08225)
diff --git a/qai_hub_models/models/posenet_mobilenet/app.py b/qai_hub_models/models/posenet_mobilenet/app.py
index 2ccca2f2..72933415 100644
--- a/qai_hub_models/models/posenet_mobilenet/app.py
+++ b/qai_hub_models/models/posenet_mobilenet/app.py
@@ -582,7 +582,7 @@ def predict_pose_keypoints(
             keypoint_scores,
             keypoint_coords,
             min_pose_score=0.25,
-            min_part_score=0.25,
+            min_part_score=0.1,
         )
         image_result = Image.fromarray(output_arr)
         return pil_undo_resize_pad(image_result, original_size, scale, padding)
diff --git a/qai_hub_models/models/posenet_mobilenet/export.py b/qai_hub_models/models/posenet_mobilenet/export.py
index 9fa56461..e178937d 100644
--- a/qai_hub_models/models/posenet_mobilenet/export.py
+++ b/qai_hub_models/models/posenet_mobilenet/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/posenet_mobilenet/info.yaml b/qai_hub_models/models/posenet_mobilenet/info.yaml
index beaeaabb..eac8b0bd 100644
--- a/qai_hub_models/models/posenet_mobilenet/info.yaml
+++ b/qai_hub_models/models/posenet_mobilenet/info.yaml
@@ -16,7 +16,7 @@ deploy_license:
 source_repo: https://github.com/rwightman/posenet-pytorch
 technical_details:
   Model checkpoint: mobilenet_v1_101
-  Input resolution: 257x193
+  Input resolution: 513x257
   Number of parameters: 3.31M
   Model size: 12.7 MB
 applicable_scenarios:
@@ -33,7 +33,7 @@ related_models:
   - hrnet_pose
 has_static_banner: yes
 has_animated_banner: yes
-license_type: other
+license_type: apache-2.0
 deploy_license_type: AI Model Hub License
 dataset:
   - coco
diff --git a/qai_hub_models/models/posenet_mobilenet/perf.yaml b/qai_hub_models/models/posenet_mobilenet/perf.yaml
index e55112e4..e405cdc0 100644
--- a/qai_hub_models/models/posenet_mobilenet/perf.yaml
+++ b/qai_hub_models/models/posenet_mobilenet/perf.yaml
@@ -39,8 +39,8 @@ models:
       inference_time: 1387.0
       throughput: 720.9805335255949
       estimated_peak_memory_range:
-        min: 16384
-        max: 1622952
+        min: 12288
+        max: 1654968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jnp181elg
+      job_id: jmg994llg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1436.0
-      throughput: 696.3788300835655
+      inference_time: 1439.0
+      throughput: 694.9270326615705
       estimated_peak_memory_range:
-        min: 12288
-        max: 67879624
+        min: 20480
+        max: 24010176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jqp4w4wlg
+      job_id: jz57vdyl5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2081.0
-      throughput: 480.5382027871216
+      inference_time: 2086.0
+      throughput: 479.3863854266539
       estimated_peak_memory_range:
         min: 12288
-        max: 19904304
+        max: 25676680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: joprydyeg
+      job_id: jegnre6r5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:41.972787Z'
+    timestamp: '2024-06-08T22:55:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 973.0
-      throughput: 1027.749229188078
+      inference_time: 977.0
+      throughput: 1023.5414534288639
       estimated_peak_memory_range:
         min: 12288
-        max: 35518112
+        max: 36616768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jvgdv4olg
+      job_id: jnp1q842g
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 1010.0
       throughput: 990.0990099009902
       estimated_peak_memory_range:
         min: 1597440
-        max: 33875456
+        max: 36578000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: j0px1r19g
+      job_id: jqp4jwlvp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1543.0
-      throughput: 648.0881399870383
+      inference_time: 1404.0
+      throughput: 712.2507122507122
       estimated_peak_memory_range:
-        min: 937984
-        max: 21691776
+        min: 1597440
+        max: 24142448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jep2mdmm5
+      job_id: jopr1yv9g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:41.972843Z'
+    timestamp: '2024-06-08T22:55:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 1391.0
-      throughput: 718.9072609633357
+      inference_time: 1388.0
+      throughput: 720.4610951008646
       estimated_peak_memory_range:
-        min: 16384
-        max: 3554688
+        min: 12288
+        max: 1476976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jz57dndr5
+      job_id: jvgd7vxeg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1437.0
-      throughput: 695.8942240779402
+      inference_time: 1447.0
+      throughput: 691.0850034554251
       estimated_peak_memory_range:
-        min: 28672
-        max: 13380040
+        min: 16384
+        max: 13954296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jegneqemg
+      job_id: jo5mvznw5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:41.972876Z'
+    timestamp: '2024-06-08T22:55:32Z'
   - torchscript_onnx_qnn:
-      inference_time: 1748.0
-      throughput: 572.0823798627002
+      inference_time: 1751.0
+      throughput: 571.1022272986864
       estimated_peak_memory_range:
         min: 1589248
         max: 1589248
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jo5mzkzqp
+      job_id: j0pxe1k15
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2133.0
-      throughput: 468.8232536333802
+      inference_time: 2129.0
+      throughput: 469.7040864255519
       estimated_peak_memory_range:
-        min: 159744
-        max: 159744
+        min: 151552
+        max: 151552
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jqpyd2d4p
+      job_id: jep23mk4g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:41.972913Z'
+    timestamp: '2024-06-08T22:55:35Z'
diff --git a/qai_hub_models/models/protocols.py b/qai_hub_models/models/protocols.py
index 1d79a391..ac1352af 100644
--- a/qai_hub_models/models/protocols.py
+++ b/qai_hub_models/models/protocols.py
@@ -33,10 +33,10 @@
 from qai_hub_models.utils.input_spec import InputSpec
 
 FromPretrainedTypeVar = TypeVar("FromPretrainedTypeVar", bound="FromPretrainedProtocol")
-
 FromPrecompiledTypeVar = TypeVar(
     "FromPrecompiledTypeVar", bound="FromPrecompiledProtocol"
 )
+HubModelProtocolTypeVar = TypeVar("HubModelProtocolTypeVar", bound="HubModelProtocol")
 
 
 class HubModelProtocol(Protocol):
diff --git a/qai_hub_models/models/quicksrnetlarge/README.md b/qai_hub_models/models/quicksrnetlarge/README.md
index 12c61b60..528f3c94 100644
--- a/qai_hub_models/models/quicksrnetlarge/README.md
+++ b/qai_hub_models/models/quicksrnetlarge/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetLarge can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetlarge/demo.py b/qai_hub_models/models/quicksrnetlarge/demo.py
index 12d688c3..70114874 100644
--- a/qai_hub_models/models/quicksrnetlarge/demo.py
+++ b/qai_hub_models/models/quicksrnetlarge/demo.py
@@ -3,16 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.quicksrnetlarge.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    QuickSRNetLarge,
-)
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_demo.jpg"
-)
+from qai_hub_models.models.quicksrnetlarge.model import MODEL_ID, QuickSRNetLarge
 
 
 # Run QuickSRNet end-to-end on a sample image.
@@ -21,7 +12,6 @@ def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=QuickSRNetLarge,
         model_id=MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/quicksrnetlarge/export.py b/qai_hub_models/models/quicksrnetlarge/export.py
index d21f0eab..5f3ec808 100644
--- a/qai_hub_models/models/quicksrnetlarge/export.py
+++ b/qai_hub_models/models/quicksrnetlarge/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/quicksrnetlarge/info.yaml b/qai_hub_models/models/quicksrnetlarge/info.yaml
index b9cc532d..b139e358 100644
--- a/qai_hub_models/models/quicksrnetlarge/info.yaml
+++ b/qai_hub_models/models/quicksrnetlarge/info.yaml
@@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
-  Model checkpoint: quicksrnet_large_4x_checkpoint_float32
-  Input resolution: 128x128
-  Number of parameters: 436K
-  Model size: 1.67 MB
+  Model checkpoint: quicksrnet_large_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 424K
+  Model size: 1.63 MB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/quicksrnetlarge/model.py b/qai_hub_models/models/quicksrnetlarge/model.py
index bac993cc..6a83e660 100644
--- a/qai_hub_models/models/quicksrnetlarge/model.py
+++ b/qai_hub_models/models/quicksrnetlarge/model.py
@@ -4,85 +4,48 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import torch
+from pathlib import Path
 
-from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
-from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
 from qai_hub_models.models._shared.quicksrnet.common import (
     _load_quicksrnet_source_model,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel
-from qai_hub_models.utils.input_spec import InputSpec
+from qai_hub_models.models._shared.super_resolution.model import (
+    DEFAULT_SCALE_FACTOR,
+    SuperResolutionModel,
+    validate_scale_factor,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch
 
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 2
-# Weights and config stored in S3 are sourced from
-# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_large_4x_w8a8.json
-# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_large_4x_checkpoint_float32.pth.tar
-QUICKSRNET_WEIGHTS = "quicksrnet_large_4x_checkpoint_float32.pth.tar"
-SCALING_FACTOR = 4
+BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_large_{scale_factor}x_checkpoint_float32.pth.tar"
 NUM_CHANNELS = 64
 NUM_INTERMEDIATE_LAYERS = 11
-USE_ITO_CONNECTION = True
-
 
-class QuickSRNetLarge(BaseModel):
-    """Exportable QuickSRNet-Large upscaler, end-to-end."""
 
-    def __init__(
-        self,
-        quicksrnet_model: torch.nn.Module,
-    ) -> None:
-        super().__init__()
-        self.model = quicksrnet_model
+class QuickSRNetLarge(SuperResolutionModel):
+    """Exportable QuickSRNetLarge super resolution model, end-to-end."""
 
     @classmethod
-    def from_pretrained(cls) -> QuickSRNetLarge:
+    def from_pretrained(
+        cls, scale_factor: int = DEFAULT_SCALE_FACTOR
+    ) -> QuickSRNetLarge:
+        validate_scale_factor(scale_factor)
         model = _load_quicksrnet_source_model(
-            SCALING_FACTOR,
+            scale_factor,
             NUM_CHANNELS,
             NUM_INTERMEDIATE_LAYERS,
-            USE_ITO_CONNECTION,
+            use_ito_connection=True,
+        )
+        url = BASE_ASSET_URL.format(scale_factor=scale_factor)
+        checkpoint_asset = CachedWebModelAsset(
+            url,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            Path(url).name,
         )
-        dst = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, QUICKSRNET_WEIGHTS
-        ).fetch()
-        checkpoint = torch.load(dst, map_location=torch.device("cpu"))
+        checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
         model.eval()
 
-        return cls(model)
-
-    def get_evaluator(self) -> BaseEvaluator:
-        return SuperResolutionOutputEvaluator()
-
-    def forward(self, image):
-        """
-        Run QuickSRNet-Large on `image`, and produce an upscaled image
-
-        Parameters:
-            image: Pixel values pre-processed for model consumption.
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-
-        Returns:
-            image: Pixel values
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-        """
-
-        return self.model(image)
-
-    @staticmethod
-    def get_input_spec(
-        batch_size: int = 1,
-        num_channels: int = 3,
-        height: int = 128,
-        width: int = 128,
-    ) -> InputSpec:
-        # Get the input specification ordered (name -> (shape, type)) pairs for this model.
-        #
-        # This can be used with the qai_hub python API to declare
-        # the model input specification upon submitting a profile job.
-        return {"image": ((batch_size, num_channels, height, width), "float32")}
+        return cls(model, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml
index cb348dbc..4ca0e8f0 100644
--- a/qai_hub_models/models/quicksrnetlarge/perf.yaml
+++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: QuickSRNetLarge
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2401.0
-      throughput: 416.49312786339027
+      inference_time: 2412.0
+      throughput: 414.5936981757877
       estimated_peak_memory_range:
         min: 28672
-        max: 17275808
+        max: 1429016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jogky0yop
+      job_id: j1gle1ojp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2092.0
-      throughput: 478.0114722753346
+      inference_time: 2108.0
+      throughput: 474.3833017077799
       estimated_peak_memory_range:
-        min: 212992
-        max: 12407432
+        min: 229376
+        max: 5466776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jw561m17p
+      job_id: jwgoe4oqp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2607.0
-      throughput: 383.5826620636747
+      inference_time: 2712.0
+      throughput: 368.7315634218289
       estimated_peak_memory_range:
-        min: 32768
-        max: 85476552
+        min: 16384
+        max: 20834136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: j7gjlyl8p
+      job_id: jygzv44op
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.007134Z'
+    timestamp: '2024-06-11T11:56:08Z'
   - torchscript_onnx_tflite:
-      inference_time: 1797.0
-      throughput: 556.4830272676684
+      inference_time: 1740.0
+      throughput: 574.7126436781609
       estimated_peak_memory_range:
         min: 16384
-        max: 28228496
+        max: 29572928
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jn5q212m5
+      job_id: jw56qdr6g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1501.0
-      throughput: 666.2225183211193
+      inference_time: 1500.0
+      throughput: 666.6666666666666
       estimated_peak_memory_range:
         min: 204800
-        max: 22080720
+        max: 21850576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j1p3m7mzg
+      job_id: j1pvz99kg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1873.0
-      throughput: 533.9028296849973
+      inference_time: 1855.0
+      throughput: 539.0835579514825
       estimated_peak_memory_range:
-        min: 217088
-        max: 19951008
+        min: 212992
+        max: 19290704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jlpevxv05
+      job_id: jz5wm113g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.007179Z'
+    timestamp: '2024-06-11T11:56:09Z'
   - torchscript_onnx_tflite:
-      inference_time: 2450.0
-      throughput: 408.16326530612247
+      inference_time: 2478.0
+      throughput: 403.5512510088781
       estimated_peak_memory_range:
         min: 24576
-        max: 17617072
+        max: 1690672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: j1glk8klp
+      job_id: j1p3qwx35
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2090.0
-      throughput: 478.4688995215311
+      inference_time: 2101.0
+      throughput: 475.9638267491671
       estimated_peak_memory_range:
-        min: 12288
-        max: 35001256
+        min: 221184
+        max: 5373456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j1pvwmwmg
+      job_id: jlpe4llo5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.007209Z'
+    timestamp: '2024-06-11T11:56:07Z'
   - torchscript_onnx_qnn:
-      inference_time: 2946.0
-      throughput: 339.44331296673454
+      inference_time: 2949.0
+      throughput: 339.097999321804
       estimated_peak_memory_range:
-        min: 253952
-        max: 253952
+        min: 204800
+        max: 204800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jwgovwvd5
+      job_id: j7gjkwwv5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2713.0
-      throughput: 368.59565057132323
+      inference_time: 2692.0
+      throughput: 371.4710252600297
       estimated_peak_memory_range:
-        min: 12627968
-        max: 12627968
+        min: 13115392
+        max: 13115392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jygz7y76p
+      job_id: jmg99xxwg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.007239Z'
+    timestamp: '2024-06-11T11:56:10Z'
diff --git a/qai_hub_models/models/quicksrnetlarge/test.py b/qai_hub_models/models/quicksrnetlarge/test.py
index ad63526d..6f148c3c 100644
--- a/qai_hub_models/models/quicksrnetlarge/test.py
+++ b/qai_hub_models/models/quicksrnetlarge/test.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.quicksrnetlarge.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
 from qai_hub_models.models.quicksrnetlarge.demo import main as demo_main
 from qai_hub_models.models.quicksrnetlarge.model import (
     MODEL_ASSET_VERSION,
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/README.md b/qai_hub_models/models/quicksrnetlarge_quantized/README.md
index 71ddd422..025a873c 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/README.md
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetLarge-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/demo.py b/qai_hub_models/models/quicksrnetlarge_quantized/demo.py
index 53d37094..1e090bfd 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/demo.py
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/demo.py
@@ -4,22 +4,15 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
 from qai_hub_models.models.quicksrnetlarge_quantized.model import (
-    MODEL_ASSET_VERSION,
     MODEL_ID,
     QuickSRNetLargeQuantizable,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnet_demo.jpg"
-)
 
 
 def main(is_test: bool = False):
     super_resolution_demo(
         QuickSRNetLargeQuantizable,
         MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py
index fcea80fa..4b832a72 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/export.py
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
index 93c2001d..897f7e5f 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
@@ -13,13 +13,14 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
-  Model checkpoint: quicksrnet_large_4x_checkpoint_int8
-  Input resolution: 128x128
-  Number of parameters: 436K
-  Model size: 464 KB
+  Model checkpoint: quicksrnet_large_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 424K
+  Model size: 449 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py
index b1541f6d..4767a779 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/model.py
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py
@@ -18,6 +18,7 @@
 from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
+from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR
 from qai_hub_models.models.quicksrnetlarge.model import QuickSRNetLarge
 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
@@ -26,7 +27,6 @@
 MODEL_ASSET_VERSION = 3
 
 DEFAULT_ENCODINGS = "quicksrnetlarge_quantized_encodings.json"
-SCALING_FACTOR = 4
 
 
 class QuickSRNetLargeQuantizable(AIMETQuantizableMixin, QuickSRNetLarge):
@@ -36,16 +36,16 @@ class QuickSRNetLargeQuantizable(AIMETQuantizableMixin, QuickSRNetLarge):
     Support for quantizing using your own weights & data will come at a later date."""
 
     def __init__(
-        self,
-        quicksrnet_model: QuantizationSimModel,
+        self, quicksrnet_model: QuantizationSimModel, scale_factor: int
     ) -> None:
-        QuickSRNetLarge.__init__(self, quicksrnet_model.model)
+        QuickSRNetLarge.__init__(self, quicksrnet_model.model, scale_factor)
         AIMETQuantizableMixin.__init__(self, quicksrnet_model)
 
     @classmethod
     def from_pretrained(
         cls,
         aimet_encodings: str | None = "DEFAULT",
+        scale_factor: int = DEFAULT_SCALE_FACTOR,
     ) -> "QuickSRNetLargeQuantizable":
         """
         Parameters:
@@ -55,7 +55,7 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         # Load Model
-        fp16_model = QuickSRNetLarge.from_pretrained()
+        fp16_model = QuickSRNetLarge.from_pretrained(scale_factor)
         input_shape = cls.get_input_spec()["image"][0]
         model = prepare_model(fp16_model)
         equalize_model(model, input_shape)
@@ -78,4 +78,4 @@ def from_pretrained(
 
         sim.model.eval()
 
-        return cls(sim)
+        return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
index c42927e0..6fc4336f 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: QuickSRNetLarge-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1335.0
-      throughput: 749.0636704119851
+      inference_time: 1324.0
+      throughput: 755.2870090634441
       estimated_peak_memory_range:
-        min: 20480
-        max: 1445696
+        min: 12288
+        max: 2457016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jvgdv4vlg
+      job_id: jvgd7zzrg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1153.0
-      throughput: 867.3026886383348
+      inference_time: 1159.0
+      throughput: 862.8127696289905
       estimated_peak_memory_range:
-        min: 16384
-        max: 8430176
+        min: 77824
+        max: 3860912
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jegneq9mg
+      job_id: jz57v7795
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1490.0
-      throughput: 671.1409395973154
+      inference_time: 1039.0
+      throughput: 962.4639076034649
       estimated_peak_memory_range:
-        min: 12288
-        max: 72563040
+        min: 69632
+        max: 4717016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 24
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 24
-      job_id: j1p87r385
+        total_layers: 22
+      job_id: jegnr77q5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.041532Z'
+    timestamp: '2024-06-11T11:56:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 1013.0
-      throughput: 987.1668311944719
+      inference_time: 1024.0
+      throughput: 976.5625
       estimated_peak_memory_range:
-        min: 12288
-        max: 24747808
+        min: 49152
+        max: 25834320
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jz57dnjr5
+      job_id: jz5wm11mg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 804.0
-      throughput: 1243.7810945273632
+      inference_time: 812.0
+      throughput: 1231.527093596059
       estimated_peak_memory_range:
-        min: 12288
-        max: 19651520
+        min: 7340032
+        max: 27038272
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jopryd4eg
+      job_id: jqp4j991p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1049.0
-      throughput: 953.2888465204957
+      inference_time: 776.0
+      throughput: 1288.659793814433
       estimated_peak_memory_range:
-        min: 0
-        max: 15719840
+        min: 36864
+        max: 17135056
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 24
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 24
-      job_id: jogky0lop
+        total_layers: 22
+      job_id: jopr1nn7g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.041576Z'
+    timestamp: '2024-06-11T11:56:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 1409.0
-      throughput: 709.7232079488999
+      inference_time: 1364.0
+      throughput: 733.1378299120234
       estimated_peak_memory_range:
-        min: 28672
-        max: 1459792
+        min: 16384
+        max: 1375064
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jqp4w4xlg
+      job_id: jmg99xx8g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1161.0
-      throughput: 861.3264427217915
+      inference_time: 1156.0
+      throughput: 865.0519031141869
       estimated_peak_memory_range:
-        min: 28672
-        max: 8085960
+        min: 94208
+        max: 9070680
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jqpyd244p
+      job_id: jo5mvdd95
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.041604Z'
+    timestamp: '2024-06-11T11:56:36Z'
   - torchscript_onnx_tflite:
-      inference_time: 3568.0
-      throughput: 280.2690582959641
+      inference_time: 3979.0
+      throughput: 251.31942699170645
       estimated_peak_memory_range:
         min: 12288
-        max: 17966464
+        max: 18592624
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: j0px1r79g
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 3190.0
-      throughput: 313.47962382445144
-      estimated_peak_memory_range:
-        min: 61440
-        max: 18936608
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 19
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 19
-      job_id: j2p0r91ep
+      job_id: jnp1qvv7g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.041632Z'
+    timestamp: '2024-06-11T11:56:31Z'
   - torchscript_onnx_tflite:
-      inference_time: 34339.0
-      throughput: 29.121407146393313
+      inference_time: 32895.0
+      throughput: 30.399756801945585
       estimated_peak_memory_range:
-        min: 3600384
-        max: 5661144
+        min: 4079616
+        max: 6087016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jo5mzkwqp
+      job_id: jvgd7zzzg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.041649Z'
+    timestamp: '2024-06-11T11:56:32Z'
   - torchscript_onnx_qnn:
-      inference_time: 1090.0
-      throughput: 917.4311926605504
+      inference_time: 1008.0
+      throughput: 992.063492063492
       estimated_peak_memory_range:
-        min: 49152
-        max: 49152
+        min: 90112
+        max: 90112
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jep2md7m5
+      job_id: j0pxeddl5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1440.0
-      throughput: 694.4444444444445
+      inference_time: 1090.0
+      throughput: 917.4311926605504
       estimated_peak_memory_range:
-        min: 8978432
-        max: 8978432
+        min: 4714496
+        max: 4714496
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 24
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 24
-      job_id: jn5q217m5
+        total_layers: 22
+      job_id: jep23vvqg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.041677Z'
+    timestamp: '2024-06-11T11:56:38Z'
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/test.py b/qai_hub_models/models/quicksrnetlarge_quantized/test.py
index 16e59332..81430c06 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/test.py
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/test.py
@@ -2,30 +2,23 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-import os
-import zipfile
 
 import numpy as np
 import pytest
 import torch
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.quicksrnetlarge_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
+from qai_hub_models.models.quicksrnetlarge.model import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.quicksrnetlarge_quantized.demo import main as demo_main
 from qai_hub_models.models.quicksrnetlarge_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
     QuickSRNetLargeQuantizable,
 )
-from qai_hub_models.utils.asset_loaders import (
-    CachedWebModelAsset,
-    load_image,
-    qaihm_temp_dir,
-)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
 from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
 
 OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_quantized_output.png"
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_demo_output.png"
 )
 
 
@@ -67,26 +60,6 @@ def test_trace():
     )
 
 
-@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606")
-@skip_clone_repo_check
-def test_aimet_export():
-    model = QuickSRNetLargeQuantizable.from_pretrained()
-    name = model.__class__.__name__
-    with qaihm_temp_dir() as tmpdir:
-        output_zip = model.convert_to_onnx_and_aimet_encodings(
-            tmpdir,
-        )
-        assert os.path.exists(output_zip)
-        with zipfile.ZipFile(output_zip, "r") as zip:
-            assert zip.namelist() == [
-                f"{name}.aimet/",
-                f"{name}.aimet/{name}.onnx",
-                f"{name}.aimet/{name}.encodings",
-            ]
-
-    # No test of torchscipt and aimet encodings due to #8954
-
-
 @skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetmedium/README.md b/qai_hub_models/models/quicksrnetmedium/README.md
index cb5b80f1..0e95ef93 100644
--- a/qai_hub_models/models/quicksrnetmedium/README.md
+++ b/qai_hub_models/models/quicksrnetmedium/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetMedium can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetmedium/demo.py b/qai_hub_models/models/quicksrnetmedium/demo.py
index 51c1ffec..2d75fb92 100644
--- a/qai_hub_models/models/quicksrnetmedium/demo.py
+++ b/qai_hub_models/models/quicksrnetmedium/demo.py
@@ -3,16 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.quicksrnetmedium.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    QuickSRNetMedium,
-)
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo.jpg"
-)
+from qai_hub_models.models.quicksrnetmedium.model import MODEL_ID, QuickSRNetMedium
 
 
 # Run QuickSRNet end-to-end on a sample image.
@@ -21,7 +12,6 @@ def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=QuickSRNetMedium,
         model_id=MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/quicksrnetmedium/export.py b/qai_hub_models/models/quicksrnetmedium/export.py
index 32a7b7ba..20dca067 100644
--- a/qai_hub_models/models/quicksrnetmedium/export.py
+++ b/qai_hub_models/models/quicksrnetmedium/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/quicksrnetmedium/info.yaml b/qai_hub_models/models/quicksrnetmedium/info.yaml
index 72ae05be..9f0a95c6 100644
--- a/qai_hub_models/models/quicksrnetmedium/info.yaml
+++ b/qai_hub_models/models/quicksrnetmedium/info.yaml
@@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
-  Model checkpoint: quicksrnet_medium_4x_checkpoint_float32
-  Input resolution: 128x128
-  Number of parameters: 61.0K
-  Model size: 244 KB
+  Model checkpoint: quicksrnet_medium_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 55.0K
+  Model size: 220 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/quicksrnetmedium/model.py b/qai_hub_models/models/quicksrnetmedium/model.py
index abb5817a..65c91c46 100644
--- a/qai_hub_models/models/quicksrnetmedium/model.py
+++ b/qai_hub_models/models/quicksrnetmedium/model.py
@@ -4,86 +4,48 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import torch
+from pathlib import Path
 
-from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
-from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
 from qai_hub_models.models._shared.quicksrnet.common import (
     _load_quicksrnet_source_model,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel
-from qai_hub_models.utils.input_spec import InputSpec
+from qai_hub_models.models._shared.super_resolution.model import (
+    DEFAULT_SCALE_FACTOR,
+    SuperResolutionModel,
+    validate_scale_factor,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch
 
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 2
-# Weights and config stored in S3 are sourced from
-# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_medium_4x_w8a8.json
-# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_medium_4x_checkpoint_float32.pth.tar
-QUICKSRNET_WEIGHTS = "quicksrnet_medium_4x_checkpoint_float32.pth.tar"
-SCALING_FACTOR = 4
+BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_medium_{scale_factor}x_checkpoint_float32.pth.tar"
 NUM_CHANNELS = 32
 NUM_INTERMEDIATE_LAYERS = 5
-USE_ITO_CONNECTION = False
-
 
-class QuickSRNetMedium(BaseModel):
-    """Exportable QuickSRNet-Medium upscaler, end-to-end."""
 
-    def __init__(
-        self,
-        quicksrnet_model: torch.nn.Module,
-    ) -> None:
-        super().__init__()
-        self.relu = torch.nn.ReLU()
-        self.model = quicksrnet_model
+class QuickSRNetMedium(SuperResolutionModel):
+    """Exportable QuickSRNetMedium super resolution model, end-to-end."""
 
     @classmethod
-    def from_pretrained(cls) -> QuickSRNetMedium:
+    def from_pretrained(
+        cls, scale_factor: int = DEFAULT_SCALE_FACTOR
+    ) -> QuickSRNetMedium:
+        validate_scale_factor(scale_factor)
         model = _load_quicksrnet_source_model(
-            SCALING_FACTOR,
+            scale_factor,
             NUM_CHANNELS,
             NUM_INTERMEDIATE_LAYERS,
-            USE_ITO_CONNECTION,
+            use_ito_connection=False,
         )
-        dst = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, QUICKSRNET_WEIGHTS
-        ).fetch()
-        checkpoint = torch.load(dst, map_location=torch.device("cpu"))
+        url = BASE_ASSET_URL.format(scale_factor=scale_factor)
+        checkpoint_asset = CachedWebModelAsset(
+            url,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            Path(url).name,
+        )
+        checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
         model.eval()
 
-        return cls(model)
-
-    def get_evaluator(self) -> BaseEvaluator:
-        return SuperResolutionOutputEvaluator()
-
-    def forward(self, image):
-        """
-        Run QuickSRNet-Medium on `image`, and produce an upscaled image
-
-        Parameters:
-            image: Pixel values pre-processed for model consumption.
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-
-        Returns:
-            image: Pixel values
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-        """
-        # image = self.relu(image)
-        return self.model(image)
-
-    @staticmethod
-    def get_input_spec(
-        batch_size: int = 1,
-        num_channels: int = 3,
-        height: int = 128,
-        width: int = 128,
-    ) -> InputSpec:
-        # Get the input specification ordered (name -> (shape, type)) pairs for this model.
-        #
-        # This can be used with the qai_hub python API to declare
-        # the model input specification upon submitting a profile job.
-        return {"image": ((batch_size, num_channels, height, width), "float32")}
+        return cls(model, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml
index 111b4ab6..a70a9366 100644
--- a/qai_hub_models/models/quicksrnetmedium/perf.yaml
+++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: QuickSRNetMedium
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1387.0
-      throughput: 720.9805335255949
+      inference_time: 1343.0
+      throughput: 744.6016381236038
       estimated_peak_memory_range:
-        min: 24576
-        max: 1439976
+        min: 16384
+        max: 1439320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jw561m37p
+      job_id: j2p0evvn5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 983.0
-      throughput: 1017.293997965412
+      inference_time: 988.0
+      throughput: 1012.1457489878543
       estimated_peak_memory_range:
-        min: 16384
-        max: 9759576
+        min: 12288
+        max: 2409584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: j1pvwm1mg
+      job_id: jw56qdwyg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1583.0
-      throughput: 631.7119393556538
+      inference_time: 1506.0
+      throughput: 664.0106241699867
       estimated_peak_memory_range:
-        min: 212992
-        max: 56649320
+        min: 217088
+        max: 3451560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jz5w9zdjp
+      job_id: jygzv4nxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.084964Z'
+    timestamp: '2024-06-11T11:57:01Z'
   - torchscript_onnx_tflite:
-      inference_time: 979.0
-      throughput: 1021.4504596527069
+      inference_time: 898.0
+      throughput: 1113.5857461024498
       estimated_peak_memory_range:
-        min: 16384
-        max: 20039200
+        min: 20480
+        max: 20940320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: j1p3m74zg
+      job_id: j1p8w44op
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 653.0
-      throughput: 1531.3935681470139
+      inference_time: 645.0
+      throughput: 1550.3875968992247
       estimated_peak_memory_range:
-        min: 233472
-        max: 17578544
+        min: 208896
+        max: 17163888
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: j7gjly08p
+      job_id: jwgoe48kp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1037.0
-      throughput: 964.3201542912246
+      inference_time: 1070.0
+      throughput: 934.5794392523364
       estimated_peak_memory_range:
-        min: 0
-        max: 13337296
+        min: 212992
+        max: 13764384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jmg9423v5
+      job_id: jz5wm14mg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.085007Z'
+    timestamp: '2024-06-11T11:57:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 1419.0
-      throughput: 704.7216349541931
+      inference_time: 1369.0
+      throughput: 730.4601899196493
       estimated_peak_memory_range:
-        min: 32768
-        max: 8332616
+        min: 24576
+        max: 1342320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jwgovw1d5
+      job_id: jn5q9mmop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 991.0
-      throughput: 1009.0817356205853
+      inference_time: 1010.0
+      throughput: 990.0990099009902
       estimated_peak_memory_range:
-        min: 212992
-        max: 65514992
+        min: 221184
+        max: 7892152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jygz7yx6p
+      job_id: jlpe4lyv5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.085033Z'
+    timestamp: '2024-06-11T11:57:00Z'
   - torchscript_onnx_qnn:
-      inference_time: 1228.0
-      throughput: 814.3322475570033
+      inference_time: 1066.0
+      throughput: 938.0863039399625
       estimated_peak_memory_range:
-        min: 237568
-        max: 237568
+        min: 204800
+        max: 204800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jlpevxr05
+      job_id: j7gjkwqe5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1529.0
-      throughput: 654.0222367560497
+      inference_time: 1498.0
+      throughput: 667.5567423230974
       estimated_peak_memory_range:
-        min: 8851456
-        max: 8851456
+        min: 9003008
+        max: 9003008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jnp181dlg
+      job_id: jmg99xd8g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.085060Z'
+    timestamp: '2024-06-11T11:57:03Z'
diff --git a/qai_hub_models/models/quicksrnetmedium/test.py b/qai_hub_models/models/quicksrnetmedium/test.py
index 9cd04d8e..4c3129a0 100644
--- a/qai_hub_models/models/quicksrnetmedium/test.py
+++ b/qai_hub_models/models/quicksrnetmedium/test.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.quicksrnetmedium.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
 from qai_hub_models.models.quicksrnetmedium.demo import main as demo_main
 from qai_hub_models.models.quicksrnetmedium.model import (
     MODEL_ASSET_VERSION,
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/README.md b/qai_hub_models/models/quicksrnetmedium_quantized/README.md
index 83ebe05d..2ffc7d9f 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/README.md
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetMedium-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/demo.py b/qai_hub_models/models/quicksrnetmedium_quantized/demo.py
index f45370ab..4d488e7e 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/demo.py
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/demo.py
@@ -4,22 +4,15 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
 from qai_hub_models.models.quicksrnetmedium_quantized.model import (
-    MODEL_ASSET_VERSION,
     MODEL_ID,
     QuickSRNetMediumQuantizable,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo.jpg"
-)
 
 
 def main(is_test: bool = False):
     super_resolution_demo(
         QuickSRNetMediumQuantizable,
         MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py
index 18909474..fa37875f 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/export.py
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
index 070615b3..e17071f4 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
@@ -13,13 +13,14 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
-  Model checkpoint: quicksrnet_medium_4x_checkpoint_int8
-  Input resolution: 128x128
-  Number of parameters: 61.0K
-  Model size: 244 KB
+  Model checkpoint: quicksrnet_medium_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 55.0K
+  Model size: 67.2 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py
index 1c17a3dc..e16d87c5 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/model.py
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py
@@ -12,12 +12,12 @@
 )
 
 # isort: on
-
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
 from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
+from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR
 from qai_hub_models.models.quicksrnetmedium.model import QuickSRNetMedium
 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
@@ -26,7 +26,6 @@
 MODEL_ASSET_VERSION = 4
 
 DEFAULT_ENCODINGS = "quicksrnetmedium_quantized_encodings.json"
-SCALING_FACTOR = 4
 
 
 class QuickSRNetMediumQuantizable(AIMETQuantizableMixin, QuickSRNetMedium):
@@ -37,14 +36,16 @@ class QuickSRNetMediumQuantizable(AIMETQuantizableMixin, QuickSRNetMedium):
     def __init__(
         self,
         quicksrnet_model: QuantizationSimModel,
+        scale_factor: int,
     ) -> None:
-        QuickSRNetMedium.__init__(self, quicksrnet_model.model)
+        QuickSRNetMedium.__init__(self, quicksrnet_model.model, scale_factor)
         AIMETQuantizableMixin.__init__(self, quicksrnet_model)
 
     @classmethod
     def from_pretrained(
         cls,
         aimet_encodings: str | None = "DEFAULT",
+        scale_factor: int = DEFAULT_SCALE_FACTOR,
     ) -> "QuickSRNetMediumQuantizable":
         """
         Parameters:
@@ -54,7 +55,7 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         # Load Model
-        fp16_model = QuickSRNetMedium.from_pretrained()
+        fp16_model = QuickSRNetMedium.from_pretrained(scale_factor)
         input_shape = cls.get_input_spec()["image"][0]
         model = prepare_model(fp16_model)
         equalize_model(model, input_shape)
@@ -77,4 +78,4 @@ def from_pretrained(
 
         sim.model.eval()
 
-        return cls(sim)
+        return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
index f1d881d8..72df875e 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: QuickSRNetMedium-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1042.0
-      throughput: 959.6928982725528
+      inference_time: 1000.0
+      throughput: 1000.0
       estimated_peak_memory_range:
-        min: 24576
-        max: 1638272
+        min: 12288
+        max: 5493824
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jz57dnvr5
+      job_id: jvgd7z2zg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 801.0
-      throughput: 1248.4394506866417
+      inference_time: 803.0
+      throughput: 1245.3300124533
       estimated_peak_memory_range:
         min: 16384
-        max: 10253480
+        max: 10291792
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jopryd1eg
+      job_id: jegnr7kq5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1171.0
-      throughput: 853.9709649871904
+      inference_time: 757.0
+      throughput: 1321.003963011889
       estimated_peak_memory_range:
-        min: 212992
-        max: 24635072
+        min: 65536
+        max: 19746264
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 16
+        layers_on_npu: 14
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 16
-      job_id: jogky0rop
+        total_layers: 14
+      job_id: j2p0ev6n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.119455Z'
+    timestamp: '2024-06-11T11:57:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 812.0
-      throughput: 1231.527093596059
+      inference_time: 814.0
+      throughput: 1228.5012285012285
       estimated_peak_memory_range:
         min: 12288
-        max: 19349424
+        max: 20707552
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jqp4w4jlg
+      job_id: jz57v7995
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 540.0
-      throughput: 1851.851851851852
+      inference_time: 546.0
+      throughput: 1831.5018315018315
       estimated_peak_memory_range:
         min: 65536
-        max: 14246880
+        max: 14574352
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jep2md3m5
+      job_id: jopr1nw7g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 853.0
-      throughput: 1172.3329425556858
+      inference_time: 558.0
+      throughput: 1792.1146953405018
       estimated_peak_memory_range:
-        min: 212992
-        max: 14121760
+        min: 65536
+        max: 12140448
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 16
+        layers_on_npu: 14
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 16
-      job_id: jn5q219m5
+        total_layers: 14
+      job_id: j1p8w41op
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.119496Z'
+    timestamp: '2024-06-11T11:57:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 1898.0
-      throughput: 526.8703898840885
+      inference_time: 995.0
+      throughput: 1005.0251256281407
       estimated_peak_memory_range:
-        min: 32768
-        max: 2871560
+        min: 24576
+        max: 3118760
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: j0px1re9g
+      job_id: jqp4j931p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 819.0
-      throughput: 1221.001221001221
+      inference_time: 800.0
+      throughput: 1250.0
       estimated_peak_memory_range:
-        min: 65536
-        max: 68766280
+        min: 16384
+        max: 18363240
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: j2p0r9eep
+      job_id: jqpyv7mlp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.119523Z'
+    timestamp: '2024-06-11T11:57:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 2862.0
-      throughput: 349.4060097833683
+      inference_time: 1968.0
+      throughput: 508.130081300813
       estimated_peak_memory_range:
-        min: 16384
-        max: 14394944
+        min: 12288
+        max: 14747456
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jo5mzkvqp
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1349.0
-      throughput: 741.2898443291327
-      estimated_peak_memory_range:
-        min: 61440
-        max: 14437104
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 11
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 11
-      job_id: j1p87rw85
+      job_id: j0pxedxl5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.119548Z'
+    timestamp: '2024-06-11T11:57:22Z'
   - torchscript_onnx_tflite:
-      inference_time: 8787.0
-      throughput: 113.80448389666553
+      inference_time: 9155.0
+      throughput: 109.22992900054615
       estimated_peak_memory_range:
-        min: 3321856
-        max: 10915320
+        min: 3342336
+        max: 7015776
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jegneqrmg
+      job_id: jo5mvd895
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.119564Z'
+    timestamp: '2024-06-11T11:57:23Z'
   - torchscript_onnx_qnn:
-      inference_time: 750.0
-      throughput: 1333.3333333333333
+      inference_time: 764.0
+      throughput: 1308.9005235602094
       estimated_peak_memory_range:
-        min: 49152
-        max: 49152
+        min: 1196032
+        max: 1196032
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jqpyd2v4p
+      job_id: jep23veqg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1208.0
-      throughput: 827.8145695364238
+      inference_time: 781.0
+      throughput: 1280.4097311139565
       estimated_peak_memory_range:
-        min: 8826880
-        max: 8826880
+        min: 7262208
+        max: 7262208
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 16
+        layers_on_npu: 14
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 16
-      job_id: j1glk8elp
+        total_layers: 14
+      job_id: jogkr98n5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.119590Z'
+    timestamp: '2024-06-11T11:57:30Z'
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/test.py b/qai_hub_models/models/quicksrnetmedium_quantized/test.py
index c8c6ea58..6f29e238 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/test.py
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/test.py
@@ -2,30 +2,23 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-import os
-import zipfile
 
 import numpy as np
 import pytest
 import torch
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.quicksrnetmedium_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
+from qai_hub_models.models.quicksrnetmedium.model import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.quicksrnetmedium_quantized.demo import main as demo_main
 from qai_hub_models.models.quicksrnetmedium_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
     QuickSRNetMediumQuantizable,
 )
-from qai_hub_models.utils.asset_loaders import (
-    CachedWebModelAsset,
-    load_image,
-    qaihm_temp_dir,
-)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
 from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
 
 OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_quantized_output.png"
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo_output.png"
 )
 
 
@@ -67,24 +60,6 @@ def test_trace():
     )
 
 
-@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606")
-@skip_clone_repo_check
-def test_aimet_export():
-    model = QuickSRNetMediumQuantizable.from_pretrained()
-    name = model.__class__.__name__
-    with qaihm_temp_dir() as tmpdir:
-        output_zip = model.convert_to_onnx_and_aimet_encodings(
-            tmpdir,
-        )
-        assert os.path.exists(output_zip)
-        with zipfile.ZipFile(output_zip, "r") as zip:
-            assert zip.namelist() == [
-                f"{name}.aimet/",
-                f"{name}.aimet/{name}.onnx",
-                f"{name}.aimet/{name}.encodings",
-            ]
-
-
 @skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetsmall/README.md b/qai_hub_models/models/quicksrnetsmall/README.md
index 665e005e..ada2e6c6 100644
--- a/qai_hub_models/models/quicksrnetsmall/README.md
+++ b/qai_hub_models/models/quicksrnetsmall/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetSmall can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetsmall/demo.py b/qai_hub_models/models/quicksrnetsmall/demo.py
index dd0e6c43..5179a61a 100644
--- a/qai_hub_models/models/quicksrnetsmall/demo.py
+++ b/qai_hub_models/models/quicksrnetsmall/demo.py
@@ -3,16 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.quicksrnetsmall.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    QuickSRNetSmall,
-)
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_demo.jpg"
-)
+from qai_hub_models.models.quicksrnetsmall.model import MODEL_ID, QuickSRNetSmall
 
 
 # Run QuickSRNet end-to-end on a sample image.
@@ -21,7 +12,6 @@ def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=QuickSRNetSmall,
         model_id=MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/quicksrnetsmall/export.py b/qai_hub_models/models/quicksrnetsmall/export.py
index 4d78a4de..7bf13e43 100644
--- a/qai_hub_models/models/quicksrnetsmall/export.py
+++ b/qai_hub_models/models/quicksrnetsmall/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/quicksrnetsmall/info.yaml b/qai_hub_models/models/quicksrnetsmall/info.yaml
index 128750f2..94d58142 100644
--- a/qai_hub_models/models/quicksrnetsmall/info.yaml
+++ b/qai_hub_models/models/quicksrnetsmall/info.yaml
@@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
-  Model checkpoint: quicksrnet_small_4x_checkpoint_float32
-  Input resolution: 128x128
-  Number of parameters: 76.0M
-  Model size: 290 MB
+  Model checkpoint: quicksrnet_small_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 27.2K
+  Model size: 110 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/quicksrnetsmall/model.py b/qai_hub_models/models/quicksrnetsmall/model.py
index f1eb380f..9b21851e 100644
--- a/qai_hub_models/models/quicksrnetsmall/model.py
+++ b/qai_hub_models/models/quicksrnetsmall/model.py
@@ -4,85 +4,48 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import torch
+from pathlib import Path
 
-from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
-from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
 from qai_hub_models.models._shared.quicksrnet.common import (
     _load_quicksrnet_source_model,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel
-from qai_hub_models.utils.input_spec import InputSpec
+from qai_hub_models.models._shared.super_resolution.model import (
+    DEFAULT_SCALE_FACTOR,
+    SuperResolutionModel,
+    validate_scale_factor,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch
 
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 2
-# Weights and config stored in S3 are sourced from
-# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_small_4x_w8a8.json
-# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_small_4x_checkpoint_float32.pth.tar
-QUICKSRNET_WEIGHTS = "quicksrnet_small_4x_checkpoint_float32.pth.tar"
-SCALING_FACTOR = 4
+BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_small_{scale_factor}x_checkpoint_float32.pth.tar"
 NUM_CHANNELS = 32
 NUM_INTERMEDIATE_LAYERS = 2
-USE_ITO_CONNECTION = False
-
 
-class QuickSRNetSmall(BaseModel):
-    """Exportable QuickSRNet-Small upscaler, end-to-end."""
 
-    def __init__(
-        self,
-        quicksrnet_model: torch.nn.Module,
-    ) -> None:
-        super().__init__()
-        self.model = quicksrnet_model
+class QuickSRNetSmall(SuperResolutionModel):
+    """Exportable QuickSRNetSmall super resolution model, end-to-end."""
 
     @classmethod
-    def from_pretrained(cls) -> QuickSRNetSmall:
+    def from_pretrained(
+        cls, scale_factor: int = DEFAULT_SCALE_FACTOR
+    ) -> QuickSRNetSmall:
+        validate_scale_factor(scale_factor)
         model = _load_quicksrnet_source_model(
-            SCALING_FACTOR,
+            scale_factor,
             NUM_CHANNELS,
             NUM_INTERMEDIATE_LAYERS,
-            USE_ITO_CONNECTION,
+            use_ito_connection=False,
+        )
+        url = BASE_ASSET_URL.format(scale_factor=scale_factor)
+        checkpoint_asset = CachedWebModelAsset(
+            url,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            Path(url).name,
         )
-        dst = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, QUICKSRNET_WEIGHTS
-        ).fetch()
-        checkpoint = torch.load(dst, map_location=torch.device("cpu"))
+        checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
         model.eval()
 
-        return cls(model)
-
-    def get_evaluator(self) -> BaseEvaluator:
-        return SuperResolutionOutputEvaluator()
-
-    def forward(self, image):
-        """
-        Run QuickSRNet-Small on `image`, and produce an upscaled image
-
-        Parameters:
-            image: Pixel values pre-processed for model consumption.
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-
-        Returns:
-            image: Pixel values
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-        """
-
-        return self.model(image)
-
-    @staticmethod
-    def get_input_spec(
-        batch_size: int = 1,
-        num_channels: int = 3,
-        height: int = 128,
-        width: int = 128,
-    ) -> InputSpec:
-        # Get the input specification ordered (name -> (shape, type)) pairs for this model.
-        #
-        # This can be used with the qai_hub python API to declare
-        # the model input specification upon submitting a profile job.
-        return {"image": ((batch_size, num_channels, height, width), "float32")}
+        return cls(model, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml
index 39e979d9..16c8950f 100644
--- a/qai_hub_models/models/quicksrnetsmall/perf.yaml
+++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: QuickSRNetSmall
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1375.0
-      throughput: 727.2727272727273
+      inference_time: 1334.0
+      throughput: 749.6251874062968
       estimated_peak_memory_range:
-        min: 24576
-        max: 1884800
+        min: 28672
+        max: 1646912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j1p3m7qzg
+      job_id: j1gle1xmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 998.0
-      throughput: 1002.0040080160321
+      inference_time: 1004.0
+      throughput: 996.01593625498
       estimated_peak_memory_range:
-        min: 212992
-        max: 57987368
+        min: 221184
+        max: 10711856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: j7gjlyk8p
+      job_id: jwgoe4rkp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1424.0
-      throughput: 702.2471910112359
+      inference_time: 1405.0
+      throughput: 711.7437722419929
       estimated_peak_memory_range:
-        min: 12288
-        max: 67552416
+        min: 212992
+        max: 2559280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 13
-      job_id: jmg9429v5
+      job_id: jygzv4mxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.162704Z'
+    timestamp: '2024-06-11T11:57:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 896.0
-      throughput: 1116.0714285714287
+      inference_time: 936.0
+      throughput: 1068.3760683760684
       estimated_peak_memory_range:
         min: 16384
-        max: 18791088
+        max: 19633600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jwgovwed5
+      job_id: jw56qd7yg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 627.0
-      throughput: 1594.896331738437
+      inference_time: 624.0
+      throughput: 1602.5641025641025
       estimated_peak_memory_range:
-        min: 0
-        max: 13197104
+        min: 208896
+        max: 13403568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jlpevx405
+      job_id: j1pvz9drg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 944.0
-      throughput: 1059.322033898305
+      inference_time: 949.0
+      throughput: 1053.740779768177
       estimated_peak_memory_range:
         min: 212992
-        max: 12361728
+        max: 12509200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 13
-      job_id: jnp181qlg
+      job_id: jz5wm17mg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.162746Z'
+    timestamp: '2024-06-11T11:57:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 1357.0
-      throughput: 736.9196757553427
+      inference_time: 1319.0
+      throughput: 758.1501137225171
       estimated_peak_memory_range:
-        min: 28672
-        max: 8270448
+        min: 20480
+        max: 7876136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j1pvwmzmg
+      job_id: j1p3qw9n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1019.0
-      throughput: 981.3542688910696
+      inference_time: 992.0
+      throughput: 1008.0645161290323
       estimated_peak_memory_range:
-        min: 221184
-        max: 8704880
+        min: 229376
+        max: 12485448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jz5w9zmjp
+      job_id: jlpe4lzv5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.162771Z'
+    timestamp: '2024-06-11T11:57:51Z'
   - torchscript_onnx_qnn:
-      inference_time: 1147.0
-      throughput: 871.8395815170009
+      inference_time: 1112.0
+      throughput: 899.2805755395683
       estimated_peak_memory_range:
-        min: 204800
-        max: 204800
+        min: 221184
+        max: 221184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jygz7yv6p
+      job_id: j7gjkw7e5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1432.0
-      throughput: 698.3240223463687
+      inference_time: 1419.0
+      throughput: 704.7216349541931
       estimated_peak_memory_range:
-        min: 9011200
-        max: 9011200
+        min: 8966144
+        max: 8966144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 13
-      job_id: jvgdv47lg
+      job_id: jmg99xm8g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.162795Z'
+    timestamp: '2024-06-11T11:57:53Z'
diff --git a/qai_hub_models/models/quicksrnetsmall/test.py b/qai_hub_models/models/quicksrnetsmall/test.py
index 87ccaee0..3e28e7d2 100644
--- a/qai_hub_models/models/quicksrnetsmall/test.py
+++ b/qai_hub_models/models/quicksrnetsmall/test.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.quicksrnetsmall.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
 from qai_hub_models.models.quicksrnetsmall.demo import main as demo_main
 from qai_hub_models.models.quicksrnetsmall.model import (
     MODEL_ASSET_VERSION,
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/README.md b/qai_hub_models/models/quicksrnetsmall_quantized/README.md
index 7380e967..8573495f 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/README.md
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetSmall-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/demo.py b/qai_hub_models/models/quicksrnetsmall_quantized/demo.py
index cb2dcd45..8b411c0b 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/demo.py
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/demo.py
@@ -4,22 +4,15 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
 from qai_hub_models.models.quicksrnetsmall_quantized.model import (
-    MODEL_ASSET_VERSION,
     MODEL_ID,
     QuickSRNetSmallQuantizable,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnet_demo.jpg"
-)
 
 
 def main(is_test: bool = False):
     super_resolution_demo(
         QuickSRNetSmallQuantizable,
         MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py
index 69aec297..48cf6baf 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/export.py
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
index fad05b98..37b32e7f 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
@@ -13,13 +13,14 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
-  Model checkpoint: quicksrnet_small_4x_checkpoint_int8
-  Input resolution: 128x128
-  Number of parameters: 33.3K
-  Model size: 42.5 KB
+  Model checkpoint: quicksrnet_small_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 27.2K
+  Model size: 34.9 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py
index 57c495a8..9b1c83a6 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/model.py
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py
@@ -12,12 +12,12 @@
 )
 
 # isort: on
-
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
 from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
+from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR
 from qai_hub_models.models.quicksrnetsmall.model import QuickSRNetSmall
 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
@@ -26,7 +26,6 @@
 MODEL_ASSET_VERSION = 4
 
 DEFAULT_ENCODINGS = "quicksrnetsmall_quantized_encodings.json"
-SCALING_FACTOR = 4
 
 
 class QuickSRNetSmallQuantizable(AIMETQuantizableMixin, QuickSRNetSmall):
@@ -37,13 +36,16 @@ class QuickSRNetSmallQuantizable(AIMETQuantizableMixin, QuickSRNetSmall):
     def __init__(
         self,
         quicksrnet_model: QuantizationSimModel,
+        scale_factor: int,
     ) -> None:
-        QuickSRNetSmall.__init__(self, quicksrnet_model.model)
+        QuickSRNetSmall.__init__(self, quicksrnet_model.model, scale_factor)
         AIMETQuantizableMixin.__init__(self, quicksrnet_model)
 
     @classmethod
     def from_pretrained(
-        cls, aimet_encodings: str | None = "DEFAULT"
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+        scale_factor: int = DEFAULT_SCALE_FACTOR,
     ) -> "QuickSRNetSmallQuantizable":
         """
         Parameters:
@@ -53,7 +55,7 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         # Load Model
-        fp16_model = QuickSRNetSmall.from_pretrained()
+        fp16_model = QuickSRNetSmall.from_pretrained(scale_factor)
         input_shape = cls.get_input_spec()["image"][0]
         model = prepare_model(fp16_model)
         equalize_model(model, input_shape)
@@ -76,4 +78,4 @@ def from_pretrained(
 
         sim.model.eval()
 
-        return cls(sim)
+        return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
index c846aab6..babfa73c 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: QuickSRNetSmall-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 950.0
-      throughput: 1052.6315789473683
+      inference_time: 974.0
+      throughput: 1026.694045174538
       estimated_peak_memory_range:
-        min: 65536
-        max: 1644392
+        min: 12288
+        max: 2677152
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jqp4w48lg
+      job_id: jvgd7z3zg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 668.0
-      throughput: 1497.005988023952
+      inference_time: 671.0
+      throughput: 1490.312965722802
       estimated_peak_memory_range:
-        min: 16384
-        max: 2183568
+        min: 65536
+        max: 3287624
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: jep2mdjm5
+      job_id: jegnr7nq5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1092.0
-      throughput: 915.7509157509157
+      inference_time: 691.0
+      throughput: 1447.178002894356
       estimated_peak_memory_range:
-        min: 212992
-        max: 2472616
+        min: 53248
+        max: 3206304
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 12
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: jn5q21wm5
+        total_layers: 12
+      job_id: j2p0evjn5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.196900Z'
+    timestamp: '2024-06-11T11:58:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 780.0
-      throughput: 1282.051282051282
+      inference_time: 793.0
+      throughput: 1261.034047919294
       estimated_peak_memory_range:
-        min: 12288
-        max: 18203808
+        min: 16384
+        max: 19014000
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j0px1rm9g
+      job_id: jz57v7495
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 450.0
-      throughput: 2222.222222222222
+      inference_time: 458.0
+      throughput: 2183.406113537118
       estimated_peak_memory_range:
-        min: 65536
-        max: 13513920
+        min: 81920
+        max: 14588544
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: jqpyd2n4p
+      job_id: jopr1n07g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 845.0
-      throughput: 1183.4319526627219
+      inference_time: 553.0
+      throughput: 1808.3182640144666
       estimated_peak_memory_range:
-        min: 12288
-        max: 13152976
+        min: 65536
+        max: 10727936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 12
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: j1glk87lp
+        total_layers: 12
+      job_id: j1p8w4xop
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.196939Z'
+    timestamp: '2024-06-11T11:58:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 987.0
-      throughput: 1013.1712259371834
+      inference_time: 960.0
+      throughput: 1041.6666666666667
       estimated_peak_memory_range:
-        min: 24576
-        max: 1367976
+        min: 20480
+        max: 1503368
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jo5mzk4qp
+      job_id: jqp4j911p
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 672.0
       throughput: 1488.095238095238
       estimated_peak_memory_range:
-        min: 28672
-        max: 3499584
+        min: 12288
+        max: 47001808
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: j1p87r885
+      job_id: jqpyv7xlp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.196966Z'
+    timestamp: '2024-06-11T11:58:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 2693.0
-      throughput: 371.3330857779428
+      inference_time: 1754.0
+      throughput: 570.1254275940707
       estimated_peak_memory_range:
         min: 12288
-        max: 13952064
+        max: 13580528
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jegneqxmg
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1247.0
-      throughput: 801.924619085806
-      estimated_peak_memory_range:
-        min: 12288
-        max: 13441264
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 8
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 8
-      job_id: jogky0dop
+      job_id: j0pxed4l5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.196990Z'
+    timestamp: '2024-06-11T11:58:12Z'
   - torchscript_onnx_tflite:
-      inference_time: 6002.0
-      throughput: 166.61112962345885
+      inference_time: 5837.0
+      throughput: 171.32088401576152
       estimated_peak_memory_range:
-        min: 3334144
-        max: 11969392
+        min: 249856
+        max: 7133040
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jopryd9eg
+      job_id: jo5mvdm95
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.197005Z'
+    timestamp: '2024-06-11T11:58:13Z'
   - torchscript_onnx_qnn:
-      inference_time: 740.0
-      throughput: 1351.3513513513512
+      inference_time: 718.0
+      throughput: 1392.757660167131
       estimated_peak_memory_range:
-        min: 49152
-        max: 49152
+        min: 1077248
+        max: 1077248
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: j2p0r9kep
+      job_id: jep23vwqg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1038.0
-      throughput: 963.3911368015414
+      inference_time: 698.0
+      throughput: 1432.6647564469913
       estimated_peak_memory_range:
-        min: 8916992
-        max: 8916992
+        min: 7000064
+        max: 7000064
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 12
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: jw561mv7p
+        total_layers: 12
+      job_id: jogkr94n5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.197032Z'
+    timestamp: '2024-06-11T11:58:20Z'
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/test.py b/qai_hub_models/models/quicksrnetsmall_quantized/test.py
index b23accfd..08fe4cf3 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/test.py
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/test.py
@@ -2,30 +2,23 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-import os
-import zipfile
 
 import numpy as np
 import pytest
 import torch
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.quicksrnetsmall_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
+from qai_hub_models.models.quicksrnetsmall.model import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.quicksrnetsmall_quantized.demo import main as demo_main
 from qai_hub_models.models.quicksrnetsmall_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
     QuickSRNetSmallQuantizable,
 )
-from qai_hub_models.utils.asset_loaders import (
-    CachedWebModelAsset,
-    load_image,
-    qaihm_temp_dir,
-)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
 from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
 
 OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_quantized_output.png"
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_demo_output.png"
 )
 
 
@@ -67,24 +60,6 @@ def test_trace():
     )
 
 
-@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606")
-@skip_clone_repo_check
-def test_aimet_export():
-    model = QuickSRNetSmallQuantizable.from_pretrained()
-    name = model.__class__.__name__
-    with qaihm_temp_dir() as tmpdir:
-        output_zip = model.convert_to_onnx_and_aimet_encodings(
-            tmpdir,
-        )
-        assert os.path.exists(output_zip)
-        with zipfile.ZipFile(output_zip, "r") as zip:
-            assert zip.namelist() == [
-                f"{name}.aimet/",
-                f"{name}.aimet/{name}.onnx",
-                f"{name}.aimet/{name}.encodings",
-            ]
-
-
 @skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/README.md b/qai_hub_models/models/real_esrgan_general_x4v3/README.md
index c25f5606..11cbbee5 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/README.md
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Real-ESRGAN-General-x4v3 can be found
   [here](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data](https://arxiv.org/abs/2107.10833)
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/export.py b/qai_hub_models/models/real_esrgan_general_x4v3/export.py
index 588420bb..4aa9e1fd 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/export.py
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
index 5a0a2c77..6f9afa6c 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Real-ESRGAN-General-x4v3
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7495.0
-      throughput: 133.422281521014
+      inference_time: 7261.0
+      throughput: 137.72207684891887
       estimated_peak_memory_range:
-        min: 15757312
-        max: 17670976
+        min: 17604608
+        max: 25105264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: jwgovwmd5
+      job_id: jmg994wwg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6301.0
-      throughput: 158.70496746548167
+      inference_time: 6295.0
+      throughput: 158.85623510722795
       estimated_peak_memory_range:
-        min: 20480
-        max: 113174448
+        min: 221184
+        max: 4921640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jlpevx205
+      job_id: jz57vdxv5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6839.0
-      throughput: 146.22020763269484
+      inference_time: 6938.0
+      throughput: 144.13375612568464
       estimated_peak_memory_range:
         min: 6332416
-        max: 18024640
+        max: 55155560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 74
-      job_id: jvgdv40lg
+      job_id: jegnre3k5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.240474Z'
+    timestamp: '2024-06-08T22:59:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 5502.0
-      throughput: 181.75209014903672
+      inference_time: 5588.0
+      throughput: 178.9549033643522
       estimated_peak_memory_range:
-        min: 16384
-        max: 54712192
+        min: 20480
+        max: 56093568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: j1pvwm4mg
+      job_id: jnp1q8e8g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 4595.0
-      throughput: 217.6278563656148
+      inference_time: 4604.0
+      throughput: 217.2024326672459
       estimated_peak_memory_range:
-        min: 0
-        max: 34446896
+        min: 208896
+        max: 37726496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jygz7yw6p
+      job_id: jqp4jwv8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5168.0
-      throughput: 193.4984520123839
+      inference_time: 5181.0
+      throughput: 193.01293186643505
       estimated_peak_memory_range:
         min: 2310144
-        max: 34782944
+        max: 36289552
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 74
-      job_id: jz5w9z86p
+      job_id: jopr1ye0g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.240532Z'
+    timestamp: '2024-06-08T22:59:38Z'
   - torchscript_onnx_tflite:
-      inference_time: 7428.0
-      throughput: 134.62574044157242
+      inference_time: 7376.0
+      throughput: 135.57483731019522
       estimated_peak_memory_range:
-        min: 6606848
-        max: 15301872
+        min: 6377472
+        max: 7904672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: j7gjly18p
+      job_id: jvgd7vorg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6290.0
-      throughput: 158.9825119236884
+      inference_time: 6271.0
+      throughput: 159.46420028703557
       estimated_peak_memory_range:
-        min: 20480
-        max: 8513144
+        min: 131072
+        max: 5213032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jnp1813lg
+      job_id: jo5mvz3d5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.240569Z'
+    timestamp: '2024-06-08T22:59:36Z'
   - torchscript_onnx_qnn:
-      inference_time: 9188.0
-      throughput: 108.837614279495
+      inference_time: 8670.0
+      throughput: 115.34025374855824
       estimated_peak_memory_range:
-        min: 229376
-        max: 229376
+        min: 208896
+        max: 208896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jz5w9zxjp
+      job_id: j0pxe1y35
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 7242.0
-      throughput: 138.08340237503452
+      inference_time: 7041.0
+      throughput: 142.02528049992898
       estimated_peak_memory_range:
-        min: 8654848
-        max: 8654848
+        min: 8646656
+        max: 8646656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 74
-      job_id: jmg942kl5
+      job_id: jep23mlrg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.240608Z'
+    timestamp: '2024-06-08T22:59:39Z'
diff --git a/qai_hub_models/models/real_esrgan_x4plus/README.md b/qai_hub_models/models/real_esrgan_x4plus/README.md
index 89551a63..3c6db231 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/README.md
+++ b/qai_hub_models/models/real_esrgan_x4plus/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Real-ESRGAN-x4plus can be found
   [here](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data](https://arxiv.org/abs/2107.10833)
diff --git a/qai_hub_models/models/real_esrgan_x4plus/export.py b/qai_hub_models/models/real_esrgan_x4plus/export.py
index 33718c26..ab0454e1 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/export.py
+++ b/qai_hub_models/models/real_esrgan_x4plus/export.py
@@ -172,7 +172,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
index 4a878b9f..c2ee5598 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
+++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Real-ESRGAN-x4plus
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 68352.0
-      throughput: 14.630149812734082
+      inference_time: 71761.0
+      throughput: 13.935145831301124
       estimated_peak_memory_range:
-        min: 3293184
-        max: 6645752
+        min: 4210688
+        max: 13102152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jvgdv48eg
+      job_id: j2p0erl95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 65610.0
-      throughput: 15.241579027587258
+      inference_time: 70398.0
+      throughput: 14.204949004233075
       estimated_peak_memory_range:
-        min: 118784
-        max: 55504888
+        min: 12288
+        max: 106397920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: j0px1r31g
+      job_id: jn5q923np
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 70866.0
-      throughput: 14.111139333389778
+      inference_time: 65953.0
+      throughput: 15.162312555911027
       estimated_peak_memory_range:
-        min: 0
-        max: 139023520
+        min: 6344704
+        max: 155593192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: jep2md445
+      job_id: jwgoev3qp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.274898Z'
+    timestamp: '2024-06-08T23:00:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 53158.0
-      throughput: 18.8118439369427
+      inference_time: 52163.0
+      throughput: 19.170676533174856
       estimated_peak_memory_range:
-        min: 3264512
-        max: 586204928
+        min: 77824
+        max: 586842272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jz57dnkl5
+      job_id: j1p8w7zkp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50734.0
-      throughput: 19.710647691883157
+      inference_time: 50801.0
+      throughput: 19.684651876931557
       estimated_peak_memory_range:
-        min: 69632
-        max: 262891120
+        min: 102400
+        max: 264449376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: jo5mzkowp
+      job_id: j1glek3jp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 52204.0
-      throughput: 19.155620258983987
+      inference_time: 51691.0
+      throughput: 19.34572749608249
       estimated_peak_memory_range:
-        min: 6447104
-        max: 191489024
+        min: 6029312
+        max: 190175536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: jqpyd2q7p
+      job_id: j1pvzwwkg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.275248Z'
+    timestamp: '2024-06-08T23:00:21Z'
   - torchscript_onnx_tflite:
-      inference_time: 65521.0
-      throughput: 15.262282321698386
+      inference_time: 67995.0
+      throughput: 14.706963747334363
       estimated_peak_memory_range:
-        min: 3284992
-        max: 6005408
+        min: 1552384
+        max: 4034000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jqp4w4mvg
+      job_id: jogkry3w5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 66374.0
-      throughput: 15.066140356163558
+      inference_time: 69414.0
+      throughput: 14.406315728815512
       estimated_peak_memory_range:
-        min: 131072
-        max: 55641936
+        min: 0
+        max: 56605216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: joprydo9g
+      job_id: j1p3qme35
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.275481Z'
+    timestamp: '2024-06-08T23:00:19Z'
   - torchscript_onnx_qnn:
-      inference_time: 73922.0
-      throughput: 13.527772516977354
+      inference_time: 73906.0
+      throughput: 13.530701160934159
       estimated_peak_memory_range:
         min: 212992
         max: 212992
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: jegneqorg
+      job_id: jw56q1n6g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 65841.0
-      throughput: 15.18810467641743
+      inference_time: 65787.0
+      throughput: 15.20057154148996
       estimated_peak_memory_range:
-        min: 1998848
-        max: 1998848
+        min: 233472
+        max: 233472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: j2p0r9d6p
+      job_id: j7gjkllv5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.275721Z'
+    timestamp: '2024-06-08T23:00:22Z'
diff --git a/qai_hub_models/models/regnet/README.md b/qai_hub_models/models/regnet/README.md
index 96c82923..3caff192 100644
--- a/qai_hub_models/models/regnet/README.md
+++ b/qai_hub_models/models/regnet/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of RegNet can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678)
diff --git a/qai_hub_models/models/regnet/evaluate.py b/qai_hub_models/models/regnet/evaluate.py
new file mode 100644
index 00000000..7f135792
--- /dev/null
+++ b/qai_hub_models/models/regnet/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.regnet import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/regnet/export.py b/qai_hub_models/models/regnet/export.py
index ab9a56b7..48731313 100644
--- a/qai_hub_models/models/regnet/export.py
+++ b/qai_hub_models/models/regnet/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/regnet/info.yaml b/qai_hub_models/models/regnet/info.yaml
index 863f909d..19bc7cde 100644
--- a/qai_hub_models/models/regnet/info.yaml
+++ b/qai_hub_models/models/regnet/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml
index 77a8aad9..6dbdb072 100644
--- a/qai_hub_models/models/regnet/perf.yaml
+++ b/qai_hub_models/models/regnet/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: RegNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2323.0
-      throughput: 430.4778303917348
+      inference_time: 2344.0
+      throughput: 426.6211604095563
       estimated_peak_memory_range:
-        min: 24576
-        max: 2684712
+        min: 40960
+        max: 2564000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 114
-      job_id: jogky0o2p
+      job_id: jygzv77op
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2114.0
-      throughput: 473.0368968779565
+      inference_time: 2105.0
+      throughput: 475.05938242280286
       estimated_peak_memory_range:
         min: 16384
-        max: 76878488
+        max: 66214464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: jw561mr0p
+      job_id: jnp1q888g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2340.0
-      throughput: 427.35042735042737
+      inference_time: 2313.0
+      throughput: 432.33895373973195
       estimated_peak_memory_range:
-        min: 12288
-        max: 74190896
+        min: 16384
+        max: 109504192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 190
-      job_id: j7gjlyoxp
+      job_id: jnp1q887g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.309964Z'
+    timestamp: '2024-06-08T23:00:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 1626.0
-      throughput: 615.0061500615006
+      inference_time: 1623.0
+      throughput: 616.1429451632779
       estimated_peak_memory_range:
-        min: 12288
-        max: 133396720
+        min: 16384
+        max: 137911392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 114
-      job_id: jn5q21z45
+      job_id: jz5wm993g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1479.0
-      throughput: 676.132521974307
+      inference_time: 1494.0
+      throughput: 669.3440428380187
       estimated_peak_memory_range:
         min: 618496
-        max: 74924816
+        max: 75619760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: j1p3m7xlg
+      job_id: jvgd7vvrg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1679.0
-      throughput: 595.5926146515783
+      inference_time: 1637.0
+      throughput: 610.8735491753207
       estimated_peak_memory_range:
         min: 0
-        max: 39651088
+        max: 37581584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 190
-      job_id: jlpevx815
+      job_id: jvgd7vvzg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.310050Z'
+    timestamp: '2024-06-08T23:00:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 2328.0
-      throughput: 429.553264604811
+      inference_time: 2318.0
+      throughput: 431.40638481449525
       estimated_peak_memory_range:
-        min: 24576
-        max: 2351904
+        min: 16384
+        max: 2479152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 114
-      job_id: j1glk8o8p
+      job_id: jmg9944wg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2110.0
-      throughput: 473.93364928909955
+      inference_time: 2102.0
+      throughput: 475.7373929590866
       estimated_peak_memory_range:
-        min: 16384
-        max: 65823536
+        min: 12288
+        max: 14056768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: j1pvwmejg
+      job_id: jmg99448g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.310103Z'
+    timestamp: '2024-06-08T23:00:50Z'
   - torchscript_onnx_qnn:
-      inference_time: 2509.0
-      throughput: 398.5651654045436
+      inference_time: 2475.0
+      throughput: 404.04040404040404
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: jwgovwox5
+      job_id: jz5wm99mg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2177.0
-      throughput: 459.34772622875516
+      inference_time: 2215.0
+      throughput: 451.46726862302484
       estimated_peak_memory_range:
-        min: 37945344
-        max: 37945344
+        min: 651264
+        max: 651264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 190
-      job_id: jygz7y8kp
+      job_id: jz57vdd95
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.310167Z'
+    timestamp: '2024-06-08T23:00:53Z'
diff --git a/qai_hub_models/models/resnet101/README.md b/qai_hub_models/models/resnet101/README.md
index 218a6131..3557c576 100644
--- a/qai_hub_models/models/resnet101/README.md
+++ b/qai_hub_models/models/resnet101/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet101 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet101/evaluate.py b/qai_hub_models/models/resnet101/evaluate.py
new file mode 100644
index 00000000..9f91113a
--- /dev/null
+++ b/qai_hub_models/models/resnet101/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet101 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet101/export.py b/qai_hub_models/models/resnet101/export.py
index 82072425..337029e4 100644
--- a/qai_hub_models/models/resnet101/export.py
+++ b/qai_hub_models/models/resnet101/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet101/info.yaml b/qai_hub_models/models/resnet101/info.yaml
index f1410fb4..abbc258b 100644
--- a/qai_hub_models/models/resnet101/info.yaml
+++ b/qai_hub_models/models/resnet101/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml
index 1fbddfb5..c30b049f 100644
--- a/qai_hub_models/models/resnet101/perf.yaml
+++ b/qai_hub_models/models/resnet101/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ResNet101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3398.0
-      throughput: 294.2907592701589
+      inference_time: 3383.0
+      throughput: 295.5956251847473
       estimated_peak_memory_range:
         min: 16384
-        max: 2088632
+        max: 2493664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jn5q21m45
+      job_id: j1pvzw1rg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3467.0
-      throughput: 288.43380444188057
+      inference_time: 3448.0
+      throughput: 290.0232018561485
       estimated_peak_memory_range:
-        min: 618496
-        max: 173749104
+        min: 16384
+        max: 173843416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1p3m7wlg
+      job_id: jygzv7xxp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3578.0
-      throughput: 279.4857462269424
+      inference_time: 3614.0
+      throughput: 276.70171555063644
       estimated_peak_memory_range:
-        min: 16384
-        max: 233232480
+        min: 45056
+        max: 355647168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jlpevxl15
+      job_id: jvgd7vrzg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.388058Z'
+    timestamp: '2024-06-08T23:02:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 2447.0
-      throughput: 408.6636697997548
+      inference_time: 2440.0
+      throughput: 409.8360655737705
       estimated_peak_memory_range:
-        min: 12288
-        max: 107056384
+        min: 16384
+        max: 109471344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: j1glk818p
+      job_id: j7gjkl0e5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2520.0
-      throughput: 396.8253968253968
+      inference_time: 2514.0
+      throughput: 397.77247414478916
       estimated_peak_memory_range:
         min: 618496
-        max: 83774768
+        max: 81083536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jwgovw4x5
+      job_id: jz5wm9dmg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2587.0
-      throughput: 386.5481252415926
+      inference_time: 2575.0
+      throughput: 388.3495145631068
       estimated_peak_memory_range:
         min: 618496
-        max: 48055616
+        max: 46866960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jygz7y4kp
+      job_id: jz57vdj95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.388157Z'
+    timestamp: '2024-06-08T23:02:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 3376.0
-      throughput: 296.2085308056872
+      inference_time: 3388.0
+      throughput: 295.159386068477
       estimated_peak_memory_range:
-        min: 45056
-        max: 2154704
+        min: 28672
+        max: 1888064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jw561md0p
+      job_id: jlpe4vrv5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3465.0
-      throughput: 288.6002886002886
+      inference_time: 3458.0
+      throughput: 289.1844997108155
       estimated_peak_memory_range:
-        min: 618496
-        max: 163344000
+        min: 626688
+        max: 163514888
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j7gjlywxp
+      job_id: jnp1q8d7g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.388219Z'
+    timestamp: '2024-06-08T23:02:26Z'
   - torchscript_onnx_qnn:
-      inference_time: 4026.0
-      throughput: 248.38549428713364
+      inference_time: 4024.0
+      throughput: 248.5089463220676
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 1011712
+        max: 1011712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1pvwm9jg
+      job_id: jmg99438g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3496.0
-      throughput: 286.0411899313501
+      inference_time: 3536.0
+      throughput: 282.80542986425337
       estimated_peak_memory_range:
-        min: 43966464
-        max: 43966464
+        min: 43122688
+        max: 43122688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jz5w9z46p
+      job_id: jqp4jwx1p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.388290Z'
+    timestamp: '2024-06-08T23:02:29Z'
diff --git a/qai_hub_models/models/resnet101_quantized/README.md b/qai_hub_models/models/resnet101_quantized/README.md
index 0d1ec373..4c46a553 100644
--- a/qai_hub_models/models/resnet101_quantized/README.md
+++ b/qai_hub_models/models/resnet101_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet101Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet101_quantized/evaluate.py b/qai_hub_models/models/resnet101_quantized/evaluate.py
new file mode 100644
index 00000000..fde921e3
--- /dev/null
+++ b/qai_hub_models/models/resnet101_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet101_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py
index feb84bd3..109a3790 100644
--- a/qai_hub_models/models/resnet101_quantized/export.py
+++ b/qai_hub_models/models/resnet101_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet101_quantized/info.yaml b/qai_hub_models/models/resnet101_quantized/info.yaml
index e25f53d7..2da5c904 100644
--- a/qai_hub_models/models/resnet101_quantized/info.yaml
+++ b/qai_hub_models/models/resnet101_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml
index 6dea8107..9bf5bafe 100644
--- a/qai_hub_models/models/resnet101_quantized/perf.yaml
+++ b/qai_hub_models/models/resnet101_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: ResNet101Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1181.0
-      throughput: 846.740050804403
+      inference_time: 1188.0
+      throughput: 841.7508417508418
       estimated_peak_memory_range:
-        min: 16384
-        max: 1842712
+        min: 0
+        max: 1614400
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jvgdv42eg
+      job_id: jo5mvzw95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1388.0
-      throughput: 720.4610951008646
+      inference_time: 1377.0
+      throughput: 726.2164124909223
       estimated_peak_memory_range:
-        min: 16384
-        max: 110788816
+        min: 12288
+        max: 58349752
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: joprydw9g
+      job_id: j2p0er1n5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1548.0
-      throughput: 645.9948320413437
+      inference_time: 1486.0
+      throughput: 672.9475100942127
       estimated_peak_memory_range:
         min: 12288
-        max: 152332600
+        max: 87121872
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 154
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 154
-      job_id: jogky082p
+        total_layers: 151
+      job_id: j1glekemp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.422642Z'
+    timestamp: '2024-06-08T23:04:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 936.0
-      throughput: 1068.3760683760684
+      inference_time: 927.0
+      throughput: 1078.7486515641856
       estimated_peak_memory_range:
         min: 12288
-        max: 92834304
+        max: 93411600
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jz57dn9l5
+      job_id: jegnre9q5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1060.0
-      throughput: 943.3962264150944
+      inference_time: 1078.0
+      throughput: 927.643784786642
       estimated_peak_memory_range:
-        min: 167936
-        max: 65745760
+        min: 163840
+        max: 66249856
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jep2mde45
+      job_id: j1p8w73op
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1252.0
-      throughput: 798.7220447284345
+      inference_time: 1162.0
+      throughput: 860.5851979345955
       estimated_peak_memory_range:
         min: 0
-        max: 43619040
+        max: 47460512
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 154
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 154
-      job_id: jn5q21v45
+        total_layers: 151
+      job_id: jw56q1qyg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.422723Z'
+    timestamp: '2024-06-08T23:04:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 1182.0
-      throughput: 846.0236886632825
+      inference_time: 1171.0
+      throughput: 853.9709649871904
       estimated_peak_memory_range:
-        min: 32768
-        max: 1724248
+        min: 12288
+        max: 1692848
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jqp4w43vg
+      job_id: jopr1y47g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1371.0
-      throughput: 729.3946024799417
+      inference_time: 1379.0
+      throughput: 725.1631617113851
       estimated_peak_memory_range:
         min: 12288
-        max: 113245424
+        max: 47947408
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j2p0r966p
+      job_id: jn5q927op
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.422775Z'
+    timestamp: '2024-06-08T23:04:22Z'
   - torchscript_onnx_tflite:
-      inference_time: 4715.0
-      throughput: 212.08907741251326
+      inference_time: 4690.0
+      throughput: 213.21961620469082
       estimated_peak_memory_range:
-        min: 49152
-        max: 27288944
+        min: 12288
+        max: 30183472
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: j0px1rx1g
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 5203.0
-      throughput: 192.19680953296177
-      estimated_peak_memory_range:
-        min: 163840
-        max: 60211840
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 146
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 146
-      job_id: j1p87r1x5
+      job_id: jep23m7qg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.422828Z'
+    timestamp: '2024-06-08T23:04:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 17085.0
-      throughput: 58.5308750365818
+      inference_time: 17058.0
+      throughput: 58.62351975612616
       estimated_peak_memory_range:
-        min: 12288
-        max: 2437568
+        min: 40960
+        max: 1956688
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jegneqkrg
+      job_id: jqpyvd4lp
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.422856Z'
+    timestamp: '2024-06-08T23:04:18Z'
   - torchscript_onnx_qnn:
-      inference_time: 1424.0
-      throughput: 702.2471910112359
+      inference_time: 1381.0
+      throughput: 724.112961622013
       estimated_peak_memory_range:
-        min: 495616
-        max: 495616
+        min: 270336
+        max: 270336
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jqpyd2m7p
+      job_id: jogkryln5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1429.0
-      throughput: 699.7900629811056
+      inference_time: 1313.0
+      throughput: 761.6146230007616
       estimated_peak_memory_range:
-        min: 50556928
-        max: 50556928
+        min: 24576
+        max: 24576
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 154
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 154
-      job_id: j1glk8l8p
+        total_layers: 151
+      job_id: j1p3qmqn5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.422910Z'
+    timestamp: '2024-06-08T23:04:25Z'
diff --git a/qai_hub_models/models/resnet18/README.md b/qai_hub_models/models/resnet18/README.md
index de48498f..2b9ced95 100644
--- a/qai_hub_models/models/resnet18/README.md
+++ b/qai_hub_models/models/resnet18/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet18 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet18/evaluate.py b/qai_hub_models/models/resnet18/evaluate.py
new file mode 100644
index 00000000..129a0a90
--- /dev/null
+++ b/qai_hub_models/models/resnet18/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet18 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet18/export.py b/qai_hub_models/models/resnet18/export.py
index 0bcf2350..b4ca9324 100644
--- a/qai_hub_models/models/resnet18/export.py
+++ b/qai_hub_models/models/resnet18/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet18/info.yaml b/qai_hub_models/models/resnet18/info.yaml
index 8b06179f..9ddcdde0 100644
--- a/qai_hub_models/models/resnet18/info.yaml
+++ b/qai_hub_models/models/resnet18/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml
index 7e1fee47..ff6f698a 100644
--- a/qai_hub_models/models/resnet18/perf.yaml
+++ b/qai_hub_models/models/resnet18/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ResNet18
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1408.0
-      throughput: 710.2272727272727
+      inference_time: 1416.0
+      throughput: 706.2146892655368
       estimated_peak_memory_range:
-        min: 57344
-        max: 1345984
+        min: 61440
+        max: 1999640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 38
-      job_id: j1p3m76lg
+      job_id: j1pvzwzrg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1468.0
-      throughput: 681.1989100817439
+      inference_time: 1446.0
+      throughput: 691.5629322268327
       estimated_peak_memory_range:
-        min: 135168
-        max: 83377024
+        min: 86016
+        max: 83516488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: j7gjlyqxp
+      job_id: jygzv7vxp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1383.0
-      throughput: 723.0657989877079
+      inference_time: 1350.0
+      throughput: 740.7407407407408
       estimated_peak_memory_range:
-        min: 12288
-        max: 101086224
+        min: 24576
+        max: 88328320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 55
-      job_id: jnp181j2g
+      job_id: jvgd7v7zg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.466422Z'
+    timestamp: '2024-06-08T23:04:50Z'
   - torchscript_onnx_tflite:
-      inference_time: 980.0
-      throughput: 1020.4081632653061
+      inference_time: 989.0
+      throughput: 1011.1223458038422
       estimated_peak_memory_range:
         min: 12288
-        max: 24571312
+        max: 25458704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 38
-      job_id: jwgovw8x5
+      job_id: j7gjklke5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1011.0
-      throughput: 989.1196834817013
+      inference_time: 1017.0
+      throughput: 983.284169124877
       estimated_peak_memory_range:
         min: 618496
-        max: 27709520
+        max: 29899792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jlpevxy15
+      job_id: jz5wm9mmg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 982.0
-      throughput: 1018.3299389002036
+      inference_time: 978.0
+      throughput: 1022.4948875255624
       estimated_peak_memory_range:
         min: 0
-        max: 17788432
+        max: 16899936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 55
-      job_id: jvgdv43eg
+      job_id: jz57vdv95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.466474Z'
+    timestamp: '2024-06-08T23:04:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 1394.0
-      throughput: 717.3601147776184
+      inference_time: 1412.0
+      throughput: 708.2152974504249
       estimated_peak_memory_range:
         min: 28672
-        max: 1904504
+        max: 154269408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 38
-      job_id: j1pvwm7jg
+      job_id: jlpe4v4v5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1469.0
-      throughput: 680.7351940095303
+      inference_time: 1473.0
+      throughput: 678.8866259334691
       estimated_peak_memory_range:
         min: 16384
-        max: 83829944
+        max: 72911032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jmg942ml5
+      job_id: jnp1q8q7g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.466506Z'
+    timestamp: '2024-06-08T23:04:49Z'
   - torchscript_onnx_qnn:
-      inference_time: 1561.0
-      throughput: 640.6149903907751
+      inference_time: 1575.0
+      throughput: 634.9206349206349
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jz5w9z76p
+      job_id: jmg99498g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1312.0
-      throughput: 762.1951219512196
+      inference_time: 1324.0
+      throughput: 755.2870090634441
       estimated_peak_memory_range:
-        min: 32391168
-        max: 32391168
+        min: 28278784
+        max: 28278784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 55
-      job_id: jz57dn4l5
+      job_id: jqp4jwj1p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.466542Z'
+    timestamp: '2024-06-08T23:04:52Z'
diff --git a/qai_hub_models/models/resnet18_quantized/README.md b/qai_hub_models/models/resnet18_quantized/README.md
index 12cd6d77..266febea 100644
--- a/qai_hub_models/models/resnet18_quantized/README.md
+++ b/qai_hub_models/models/resnet18_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet18Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet18_quantized/evaluate.py b/qai_hub_models/models/resnet18_quantized/evaluate.py
new file mode 100644
index 00000000..d98aec44
--- /dev/null
+++ b/qai_hub_models/models/resnet18_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet18_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py
index 22973f71..4ed971cc 100644
--- a/qai_hub_models/models/resnet18_quantized/export.py
+++ b/qai_hub_models/models/resnet18_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet18_quantized/info.yaml b/qai_hub_models/models/resnet18_quantized/info.yaml
index f132fa42..c9d5ab07 100644
--- a/qai_hub_models/models/resnet18_quantized/info.yaml
+++ b/qai_hub_models/models/resnet18_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml
index 6c6466e8..a76fba6f 100644
--- a/qai_hub_models/models/resnet18_quantized/perf.yaml
+++ b/qai_hub_models/models/resnet18_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: ResNet18Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 433.0
-      throughput: 2309.4688221709007
+      inference_time: 420.0
+      throughput: 2380.9523809523807
       estimated_peak_memory_range:
         min: 12288
-        max: 1345424
+        max: 1492608
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jopryd09g
+      job_id: jo5mvzv95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 626.0
-      throughput: 1597.444089456869
+      inference_time: 639.0
+      throughput: 1564.9452269170579
       estimated_peak_memory_range:
-        min: 20480
-        max: 169841240
+        min: 24576
+        max: 9441728
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: j1glk8x8p
+      job_id: j2p0eren5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 720.0
-      throughput: 1388.888888888889
+      inference_time: 641.0
+      throughput: 1560.0624024960998
       estimated_peak_memory_range:
-        min: 319488
-        max: 21949752
+        min: 12288
+        max: 25595784
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 45
+        layers_on_npu: 42
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 45
-      job_id: j7gjly7xp
+        total_layers: 42
+      job_id: j1glek7mp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.500897Z'
+    timestamp: '2024-06-08T23:05:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 342.0
-      throughput: 2923.9766081871344
+      inference_time: 352.0
+      throughput: 2840.909090909091
       estimated_peak_memory_range:
-        min: 12288
-        max: 23567296
+        min: 16384
+        max: 24707232
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jqpyd2x7p
+      job_id: jegnrerq5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 475.0
       throughput: 2105.2631578947367
       estimated_peak_memory_range:
         min: 163840
-        max: 27991680
+        max: 28038704
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: jw561m70p
+      job_id: j1p8w7wop
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 587.0
-      throughput: 1703.5775127768313
+      inference_time: 479.0
+      throughput: 2087.6826722338205
       estimated_peak_memory_range:
-        min: 618496
-        max: 21565152
+        min: 12288
+        max: 20801936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 45
+        layers_on_npu: 42
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 45
-      job_id: jlpevxz15
+        total_layers: 42
+      job_id: jw56q1vyg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.500945Z'
+    timestamp: '2024-06-08T23:05:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 438.0
-      throughput: 2283.10502283105
+      inference_time: 420.0
+      throughput: 2380.9523809523807
       estimated_peak_memory_range:
         min: 12288
-        max: 1304680
+        max: 1230392
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: j2p0r9j6p
+      job_id: jopr1y17g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 627.0
-      throughput: 1594.896331738437
+      inference_time: 632.0
+      throughput: 1582.2784810126582
       estimated_peak_memory_range:
-        min: 28672
-        max: 29355864
+        min: 16384
+        max: 8848856
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: jwgovwrx5
+      job_id: jn5q929op
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.500976Z'
+    timestamp: '2024-06-08T23:05:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 1473.0
-      throughput: 678.8866259334691
+      inference_time: 1426.0
+      throughput: 701.2622720897616
       estimated_peak_memory_range:
         min: 12288
-        max: 15538320
+        max: 15923968
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: j1p87rxx5
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1844.0
-      throughput: 542.2993492407809
-      estimated_peak_memory_range:
-        min: 12288
-        max: 24215008
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 37
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 37
-      job_id: j1pvwmdjg
+      job_id: jep23m3qg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.501006Z'
+    timestamp: '2024-06-08T23:05:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 7020.0
-      throughput: 142.45014245014244
+      inference_time: 7066.0
+      throughput: 141.52278516841213
       estimated_peak_memory_range:
-        min: 77824
-        max: 1892960
+        min: 40960
+        max: 6406016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jn5q21y45
+      job_id: jqpyvdvlp
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.501023Z'
+    timestamp: '2024-06-08T23:05:29Z'
   - torchscript_onnx_qnn:
-      inference_time: 836.0
-      throughput: 1196.1722488038276
+      inference_time: 742.0
+      throughput: 1347.7088948787061
       estimated_peak_memory_range:
-        min: 552960
-        max: 552960
+        min: 1617920
+        max: 1617920
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: j1p3m79lg
+      job_id: jogkryrn5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 721.0
-      throughput: 1386.9625520110958
+      inference_time: 836.0
+      throughput: 1196.1722488038276
       estimated_peak_memory_range:
-        min: 10059776
-        max: 10059776
+        min: 3690496
+        max: 3690496
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 45
+        layers_on_npu: 42
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 45
-      job_id: jygz7ymkp
+        total_layers: 42
+      job_id: j1p3qm8n5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.501055Z'
+    timestamp: '2024-06-08T23:05:36Z'
diff --git a/qai_hub_models/models/resnet50/README.md b/qai_hub_models/models/resnet50/README.md
index 6abe8dfa..4ec67961 100644
--- a/qai_hub_models/models/resnet50/README.md
+++ b/qai_hub_models/models/resnet50/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet50/evaluate.py b/qai_hub_models/models/resnet50/evaluate.py
new file mode 100644
index 00000000..77f5289f
--- /dev/null
+++ b/qai_hub_models/models/resnet50/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet50 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet50/export.py b/qai_hub_models/models/resnet50/export.py
index 01c180e3..688099b7 100644
--- a/qai_hub_models/models/resnet50/export.py
+++ b/qai_hub_models/models/resnet50/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet50/info.yaml b/qai_hub_models/models/resnet50/info.yaml
index 7b52d418..4679dc8b 100644
--- a/qai_hub_models/models/resnet50/info.yaml
+++ b/qai_hub_models/models/resnet50/info.yaml
@@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml
index 8b5c0349..be4d4c11 100644
--- a/qai_hub_models/models/resnet50/perf.yaml
+++ b/qai_hub_models/models/resnet50/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2279.0
-      throughput: 438.7889425186485
+      inference_time: 2269.0
+      throughput: 440.72278536800354
       estimated_peak_memory_range:
         min: 16384
-        max: 2074480
+        max: 2153680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jmg942zl5
+      job_id: j1pvzw4rg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2376.0
-      throughput: 420.8754208754209
+      inference_time: 2387.0
+      throughput: 418.93590280687056
       estimated_peak_memory_range:
         min: 622592
-        max: 186037320
+        max: 175232184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5w9zl3p
+      job_id: jygzv7wxp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2361.0
-      throughput: 423.5493434985176
+      inference_time: 2366.0
+      throughput: 422.654268808115
       estimated_peak_memory_range:
-        min: 28672
-        max: 219004312
+        min: 12288
+        max: 261165672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jz57dnev5
+      job_id: jvgd7v0zg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.544373Z'
+    timestamp: '2024-06-08T23:06:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 1634.0
-      throughput: 611.9951040391677
+      inference_time: 1615.0
+      throughput: 619.1950464396285
       estimated_peak_memory_range:
-        min: 16384
-        max: 70254992
+        min: 12288
+        max: 72992224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jnp181n2g
+      job_id: j7gjkl1e5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1686.0
-      throughput: 593.1198102016607
+      inference_time: 1705.0
+      throughput: 586.5102639296188
       estimated_peak_memory_range:
-        min: 643072
-        max: 53416880
+        min: 0
+        max: 51115584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jmg942zw5
+      job_id: jz5wm9xmg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1749.0
-      throughput: 571.7552887364208
+      inference_time: 1750.0
+      throughput: 571.4285714285714
       estimated_peak_memory_range:
         min: 618496
-        max: 33899296
+        max: 34613760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jqp4w4y8g
+      job_id: jz57vd695
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.544443Z'
+    timestamp: '2024-06-08T23:06:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 2281.0
-      throughput: 438.4042086804033
+      inference_time: 2272.0
+      throughput: 440.14084507042253
       estimated_peak_memory_range:
-        min: 16384
-        max: 2328304
+        min: 24576
+        max: 1714808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jvgdv4deg
+      job_id: jlpe4v2v5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2384.0
-      throughput: 419.46308724832215
+      inference_time: 2385.0
+      throughput: 419.2872117400419
       estimated_peak_memory_range:
-        min: 634880
-        max: 185894144
+        min: 622592
+        max: 175433648
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jvgdv4drg
+      job_id: jnp1q837g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.544487Z'
+    timestamp: '2024-06-08T23:06:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 2759.0
-      throughput: 362.4501631025734
+      inference_time: 2608.0
+      throughput: 383.4355828220859
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jnp181n8g
+      job_id: jmg99488g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2346.0
-      throughput: 426.25745950554136
+      inference_time: 2295.0
+      throughput: 435.7298474945534
       estimated_peak_memory_range:
-        min: 84422656
-        max: 84422656
+        min: 54059008
+        max: 54059008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: j0px1rl3g
+      job_id: jqp4jw81p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.544535Z'
+    timestamp: '2024-06-08T23:06:05Z'
diff --git a/qai_hub_models/models/resnext101/README.md b/qai_hub_models/models/resnext101/README.md
index dbe49a2a..cf629f9a 100644
--- a/qai_hub_models/models/resnext101/README.md
+++ b/qai_hub_models/models/resnext101/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt101 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext101/evaluate.py b/qai_hub_models/models/resnext101/evaluate.py
new file mode 100644
index 00000000..b34ff213
--- /dev/null
+++ b/qai_hub_models/models/resnext101/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnext101 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnext101/export.py b/qai_hub_models/models/resnext101/export.py
index a0347299..e4b05a65 100644
--- a/qai_hub_models/models/resnext101/export.py
+++ b/qai_hub_models/models/resnext101/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext101/info.yaml b/qai_hub_models/models/resnext101/info.yaml
index 40e7612f..3587cabd 100644
--- a/qai_hub_models/models/resnext101/info.yaml
+++ b/qai_hub_models/models/resnext101/info.yaml
@@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml
index 751c754f..499cef3a 100644
--- a/qai_hub_models/models/resnext101/perf.yaml
+++ b/qai_hub_models/models/resnext101/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ResNeXt101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6683.0
-      throughput: 149.63339817447255
+      inference_time: 6774.0
+      throughput: 147.62326542663124
       estimated_peak_memory_range:
-        min: 159744
-        max: 2988272
+        min: 24576
+        max: 2449424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jo5mzkkdp
+      job_id: j7gjkloe5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6643.0
-      throughput: 150.5343971097396
+      inference_time: 6930.0
+      throughput: 144.3001443001443
       estimated_peak_memory_range:
         min: 16384
-        max: 35374192
+        max: 36101088
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jep2mddr5
+      job_id: jz5wm98mg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6848.0
-      throughput: 146.02803738317758
+      inference_time: 6834.0
+      throughput: 146.3271875914545
       estimated_peak_memory_range:
-        min: 176128
-        max: 448765640
+        min: 159744
+        max: 453366256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jogky00wp
+      job_id: jz5wm984g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.693251Z'
+    timestamp: '2024-06-08T23:07:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 4736.0
-      throughput: 211.14864864864865
+      inference_time: 4859.0
+      throughput: 205.80366330520684
       estimated_peak_memory_range:
         min: 20480
-        max: 362103440
+        max: 364879056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jegneqqkg
+      job_id: jlpe4v8v5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 4801.0
-      throughput: 208.28993959591753
+      inference_time: 4800.0
+      throughput: 208.33333333333334
       estimated_peak_memory_range:
         min: 0
-        max: 124593744
+        max: 126702208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jqpyd228p
+      job_id: jmg994k8g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5111.0
-      throughput: 195.65642731363727
+      inference_time: 5102.0
+      throughput: 196.0015680125441
       estimated_peak_memory_range:
-        min: 618496
-        max: 92380992
+        min: 0
+        max: 91577616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jn5q211n5
+      job_id: jmg994kmg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.693354Z'
+    timestamp: '2024-06-08T23:07:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 6723.0
-      throughput: 148.74312063067083
+      inference_time: 6604.0
+      throughput: 151.42337976983646
       estimated_peak_memory_range:
-        min: 32768
-        max: 2943704
+        min: 20480
+        max: 3255112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: joprydd0g
+      job_id: jygzv78xp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6632.0
-      throughput: 150.78407720144753
+      inference_time: 6571.0
+      throughput: 152.18383807639628
       estimated_peak_memory_range:
-        min: 20480
-        max: 36799736
+        min: 0
+        max: 35912680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1p87rrk5
+      job_id: jvgd7v8zg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.693417Z'
+    timestamp: '2024-06-08T23:07:52Z'
   - torchscript_onnx_qnn:
-      inference_time: 9099.0
-      throughput: 109.90218705352237
+      inference_time: 9160.0
+      throughput: 109.17030567685589
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 913408
+        max: 913408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j2p0r999p
+      job_id: jnp1q877g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6742.0
-      throughput: 148.3239394838327
+      inference_time: 6731.0
+      throughput: 148.5663348685188
       estimated_peak_memory_range:
-        min: 109170688
-        max: 109170688
+        min: 117399552
+        max: 117399552
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: j1glk88jp
+      job_id: jnp1q87ng
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.693490Z'
+    timestamp: '2024-06-08T23:07:55Z'
diff --git a/qai_hub_models/models/resnext101_quantized/README.md b/qai_hub_models/models/resnext101_quantized/README.md
index 756cb042..e91fdd6a 100644
--- a/qai_hub_models/models/resnext101_quantized/README.md
+++ b/qai_hub_models/models/resnext101_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt101Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext101_quantized/evaluate.py b/qai_hub_models/models/resnext101_quantized/evaluate.py
new file mode 100644
index 00000000..9652d8f6
--- /dev/null
+++ b/qai_hub_models/models/resnext101_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnext101_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py
index ac94e2ad..007bc290 100644
--- a/qai_hub_models/models/resnext101_quantized/export.py
+++ b/qai_hub_models/models/resnext101_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext101_quantized/info.yaml b/qai_hub_models/models/resnext101_quantized/info.yaml
index 47a2496c..e1431d09 100644
--- a/qai_hub_models/models/resnext101_quantized/info.yaml
+++ b/qai_hub_models/models/resnext101_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml
index d14cbbc7..1537f3e3 100644
--- a/qai_hub_models/models/resnext101_quantized/perf.yaml
+++ b/qai_hub_models/models/resnext101_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: ResNeXt101Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2913.0
-      throughput: 343.2887058015791
+      inference_time: 2846.0
+      throughput: 351.37034434293747
       estimated_peak_memory_range:
-        min: 16384
-        max: 2349024
+        min: 28672
+        max: 2113784
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: j1p3m773g
+      job_id: jqp4jwm2p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3066.0
-      throughput: 326.1578604044357
+      inference_time: 3084.0
+      throughput: 324.25421530479895
       estimated_peak_memory_range:
-        min: 12288
-        max: 34091696
+        min: 16384
+        max: 35906456
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jygz7y0op
+      job_id: jep23m46g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3564.0
-      throughput: 280.58361391694723
+      inference_time: 3364.0
+      throughput: 297.2651605231867
       estimated_peak_memory_range:
-        min: 8192
-        max: 136690536
+        min: 12288
+        max: 140467400
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 154
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 154
-      job_id: jz57dn8v5
+        total_layers: 151
+      job_id: jogkry9v5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.728144Z'
+    timestamp: '2024-06-08T23:09:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 2099.0
-      throughput: 476.41734159123394
+      inference_time: 2072.0
+      throughput: 482.6254826254826
       estimated_peak_memory_range:
         min: 12288
-        max: 256604400
+        max: 258677904
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jwgovwwq5
+      job_id: j0pxe1385
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2271.0
-      throughput: 440.33465433729634
+      inference_time: 2331.0
+      throughput: 429.000429000429
       estimated_peak_memory_range:
         min: 12288
-        max: 115257056
+        max: 119524448
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jz5w9zr3p
+      job_id: jqpyvdq0p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2622.0
-      throughput: 381.38825324180016
+      inference_time: 2469.0
+      throughput: 405.0222762251924
       estimated_peak_memory_range:
-        min: 618496
-        max: 97094352
+        min: 12288
+        max: 93879712
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 154
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 154
-      job_id: jqp4w428g
+        total_layers: 151
+      job_id: jn5q92mep
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.728227Z'
+    timestamp: '2024-06-08T23:09:58Z'
   - torchscript_onnx_tflite:
-      inference_time: 2843.0
-      throughput: 351.74111853675697
+      inference_time: 2846.0
+      throughput: 351.37034434293747
       estimated_peak_memory_range:
-        min: 28672
-        max: 2594968
+        min: 16384
+        max: 2438744
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: j1pvwmnkg
+      job_id: jo5mvzo75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3075.0
-      throughput: 325.2032520325203
+      inference_time: 3060.0
+      throughput: 326.797385620915
       estimated_peak_memory_range:
         min: 16384
-        max: 35433304
+        max: 35555384
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jnp181m8g
+      job_id: j1p8w74qp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.728279Z'
+    timestamp: '2024-06-08T23:09:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 10225.0
-      throughput: 97.79951100244499
+      inference_time: 10195.0
+      throughput: 98.0872976949485
       estimated_peak_memory_range:
-        min: 12288
-        max: 194683600
+        min: 53248
+        max: 195935712
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: j7gjly8vp
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 10708.0
-      throughput: 93.38812103100486
-      estimated_peak_memory_range:
-        min: 0
-        max: 124741440
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 146
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 146
-      job_id: jvgdv4mrg
+      job_id: jegnreoj5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.728331Z'
+    timestamp: '2024-06-08T23:09:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 132850.0
-      throughput: 7.527286413248024
+      inference_time: 131262.0
+      throughput: 7.618351084091359
       estimated_peak_memory_range:
         min: 12288
-        max: 357543064
+        max: 356618752
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 125
         layers_on_cpu: 11
         total_layers: 148
-      job_id: jlpevxno5
+      job_id: jopr1yokg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.728360Z'
+    timestamp: '2024-06-08T23:09:52Z'
   - torchscript_onnx_qnn:
-      inference_time: 3362.0
-      throughput: 297.441998810232
+      inference_time: 3311.0
+      throughput: 302.0235578375113
       estimated_peak_memory_range:
-        min: 253952
-        max: 253952
+        min: 262144
+        max: 262144
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jmg942qw5
+      job_id: j2p0erv05
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 3382.0
-      throughput: 295.68302779420463
+      inference_time: 3294.0
+      throughput: 303.58227079538557
       estimated_peak_memory_range:
-        min: 137187328
-        max: 137187328
+        min: 12066816
+        max: 12066816
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 154
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 154
-      job_id: j0px1rz3g
+        total_layers: 151
+      job_id: j1glek12p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.728413Z'
+    timestamp: '2024-06-08T23:09:59Z'
diff --git a/qai_hub_models/models/resnext50/README.md b/qai_hub_models/models/resnext50/README.md
index 57bd0206..68e67be5 100644
--- a/qai_hub_models/models/resnext50/README.md
+++ b/qai_hub_models/models/resnext50/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext50/evaluate.py b/qai_hub_models/models/resnext50/evaluate.py
new file mode 100644
index 00000000..50d09632
--- /dev/null
+++ b/qai_hub_models/models/resnext50/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnext50 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnext50/export.py b/qai_hub_models/models/resnext50/export.py
index b30842ef..ecbf5998 100644
--- a/qai_hub_models/models/resnext50/export.py
+++ b/qai_hub_models/models/resnext50/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext50/info.yaml b/qai_hub_models/models/resnext50/info.yaml
index 9ba350da..1270dfa3 100644
--- a/qai_hub_models/models/resnext50/info.yaml
+++ b/qai_hub_models/models/resnext50/info.yaml
@@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml
index fe0146fc..6ce25b12 100644
--- a/qai_hub_models/models/resnext50/perf.yaml
+++ b/qai_hub_models/models/resnext50/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: ResNeXt50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2499.0
-      throughput: 400.16006402561027
+      inference_time: 2511.0
+      throughput: 398.24771007566704
       estimated_peak_memory_range:
-        min: 16384
-        max: 2423408
+        min: 12288
+        max: 2265792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jegneqwkg
+      job_id: j1p3qmwm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2534.0
-      throughput: 394.6329913180742
+      inference_time: 2553.0
+      throughput: 391.6960438699569
       estimated_peak_memory_range:
-        min: 12288
-        max: 98425656
+        min: 57344
+        max: 21403728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jqpyd2y8p
+      job_id: j7gjklw15
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2810.0
-      throughput: 355.87188612099646
+      inference_time: 2768.0
+      throughput: 361.271676300578
       estimated_peak_memory_range:
-        min: 36864
-        max: 126928704
+        min: 16384
+        max: 171552072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jn5q21dn5
+      job_id: jmg994xmg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.771651Z'
+    timestamp: '2024-06-08T23:10:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 1788.0
-      throughput: 559.2841163310962
+      inference_time: 1800.0
+      throughput: 555.5555555555555
       estimated_peak_memory_range:
         min: 16384
-        max: 163487424
+        max: 163995360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jopryd70g
+      job_id: jwgoev41p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1875.0
-      throughput: 533.3333333333334
+      inference_time: 1878.0
+      throughput: 532.4813631522896
       estimated_peak_memory_range:
-        min: 618496
-        max: 60135536
+        min: 0
+        max: 60231440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j2p0r9x9p
+      job_id: jlpe4vl85
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1970.0
-      throughput: 507.61421319796955
+      inference_time: 1929.0
+      throughput: 518.4033177812338
       estimated_peak_memory_range:
         min: 618496
-        max: 38894176
+        max: 41928304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: j1glk8qjp
+      job_id: jnp1q8vng
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.771721Z'
+    timestamp: '2024-06-08T23:10:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 2508.0
-      throughput: 398.72408293460927
+      inference_time: 2498.0
+      throughput: 400.320256204964
       estimated_peak_memory_range:
-        min: 24576
-        max: 2263136
+        min: 20480
+        max: 2219560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jep2mdzr5
+      job_id: j1pvzw9zg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2577.0
-      throughput: 388.04811796662784
+      inference_time: 2553.0
+      throughput: 391.6960438699569
       estimated_peak_memory_range:
-        min: 622592
-        max: 88271880
+        min: 20480
+        max: 88251120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jogky0kwp
+      job_id: jz5wm914g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.771766Z'
+    timestamp: '2024-06-08T23:10:25Z'
   - torchscript_onnx_qnn:
-      inference_time: 2906.0
-      throughput: 344.1156228492774
+      inference_time: 2941.0
+      throughput: 340.02040122407345
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 1044480
+        max: 1044480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j1p87rkk5
+      job_id: jygzv744p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2653.0
-      throughput: 376.9317753486619
+      inference_time: 2624.0
+      throughput: 381.0975609756098
       estimated_peak_memory_range:
-        min: 65294336
-        max: 65294336
+        min: 46874624
+        max: 46874624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jw561m06p
+      job_id: jvgd7vz6g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.771815Z'
+    timestamp: '2024-06-08T23:10:28Z'
diff --git a/qai_hub_models/models/resnext50_quantized/README.md b/qai_hub_models/models/resnext50_quantized/README.md
index 9950b426..3ce0b330 100644
--- a/qai_hub_models/models/resnext50_quantized/README.md
+++ b/qai_hub_models/models/resnext50_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt50Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext50_quantized/evaluate.py b/qai_hub_models/models/resnext50_quantized/evaluate.py
new file mode 100644
index 00000000..1eb23114
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnext50_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py
index 42967b4b..73b7aa57 100644
--- a/qai_hub_models/models/resnext50_quantized/export.py
+++ b/qai_hub_models/models/resnext50_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext50_quantized/info.yaml b/qai_hub_models/models/resnext50_quantized/info.yaml
index 69be8e37..1b2c5291 100644
--- a/qai_hub_models/models/resnext50_quantized/info.yaml
+++ b/qai_hub_models/models/resnext50_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml
index be21a304..b5dae635 100644
--- a/qai_hub_models/models/resnext50_quantized/perf.yaml
+++ b/qai_hub_models/models/resnext50_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: ResNeXt50Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 941.0
-      throughput: 1062.6992561105208
+      inference_time: 943.0
+      throughput: 1060.4453870625662
       estimated_peak_memory_range:
-        min: 24576
-        max: 1725856
+        min: 32768
+        max: 1732496
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jwgovw9q5
+      job_id: jqp4jw92p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1174.0
-      throughput: 851.7887563884157
+      inference_time: 1179.0
+      throughput: 848.1764206955047
       estimated_peak_memory_range:
-        min: 0
-        max: 10549448
+        min: 20480
+        max: 66746984
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jz5w9z03p
+      job_id: jep23mv6g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1535.0
-      throughput: 651.4657980456026
+      inference_time: 1353.0
+      throughput: 739.0983000739099
       estimated_peak_memory_range:
-        min: 12288
-        max: 88731520
+        min: 28672
+        max: 79646016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 86
+        layers_on_npu: 83
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 86
-      job_id: jmg942785
+        total_layers: 83
+      job_id: jn5q92vep
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.806370Z'
+    timestamp: '2024-06-08T23:11:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 720.0
-      throughput: 1388.888888888889
+      inference_time: 710.0
+      throughput: 1408.4507042253522
       estimated_peak_memory_range:
-        min: 12288
-        max: 100104144
+        min: 1523712
+        max: 101683104
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: j1pvwmykg
+      job_id: j0pxe1d85
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 875.0
-      throughput: 1142.857142857143
+      inference_time: 873.0
+      throughput: 1145.475372279496
       estimated_peak_memory_range:
-        min: 167936
-        max: 53235392
+        min: 163840
+        max: 57724624
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jmg9427w5
+      job_id: jqpyvd70p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1140.0
-      throughput: 877.1929824561404
+      inference_time: 991.0
+      throughput: 1009.0817356205853
       estimated_peak_memory_range:
-        min: 618496
-        max: 42079072
+        min: 28672
+        max: 41643216
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 86
+        layers_on_npu: 83
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 86
-      job_id: jnp181k7g
+        total_layers: 83
+      job_id: j1glekl2p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.806430Z'
+    timestamp: '2024-06-08T23:11:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 939.0
-      throughput: 1064.9627263045793
+      inference_time: 944.0
+      throughput: 1059.322033898305
       estimated_peak_memory_range:
         min: 12288
-        max: 1827928
+        max: 2151184
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: j7gjly6vp
+      job_id: jo5mvzd75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1172.0
-      throughput: 853.2423208191126
+      inference_time: 1183.0
+      throughput: 845.30853761623
       estimated_peak_memory_range:
-        min: 172032
-        max: 10872072
+        min: 16384
+        max: 66707936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jvgdv4yrg
+      job_id: jogkry8v5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.806470Z'
+    timestamp: '2024-06-08T23:11:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 3073.0
-      throughput: 325.4149040026033
+      inference_time: 3287.0
+      throughput: 304.228780042592
       estimated_peak_memory_range:
-        min: 0
-        max: 54569392
+        min: 12288
+        max: 55813072
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jlpevx0o5
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 3634.0
-      throughput: 275.178866263071
-      estimated_peak_memory_range:
-        min: 163840
-        max: 53528640
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 78
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 78
-      job_id: jz5w9z0mp
+      job_id: jegnre7j5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.806508Z'
+    timestamp: '2024-06-08T23:11:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 63166.0
-      throughput: 15.831301649621633
+      inference_time: 64039.0
+      throughput: 15.615484314246006
       estimated_peak_memory_range:
-        min: 0
-        max: 98697840
+        min: 868352
+        max: 98172464
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 57
         layers_on_cpu: 11
         total_layers: 80
-      job_id: jygz7yqop
+      job_id: jopr1ynkg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.806531Z'
+    timestamp: '2024-06-08T23:11:20Z'
   - torchscript_onnx_qnn:
-      inference_time: 1357.0
-      throughput: 736.9196757553427
+      inference_time: 1350.0
+      throughput: 740.7407407407408
       estimated_peak_memory_range:
-        min: 413696
-        max: 413696
+        min: 1429504
+        max: 1429504
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jnp181k8g
+      job_id: j2p0er605
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1395.0
-      throughput: 716.8458781362007
+      inference_time: 1264.0
+      throughput: 791.1392405063291
       estimated_peak_memory_range:
-        min: 52191232
-        max: 52191232
+        min: 24887296
+        max: 24887296
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 86
+        layers_on_npu: 83
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 86
-      job_id: jvgdv4yzg
+        total_layers: 83
+      job_id: jw56q1wng
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.806571Z'
+    timestamp: '2024-06-08T23:11:27Z'
diff --git a/qai_hub_models/models/riffusion_quantized/README.md b/qai_hub_models/models/riffusion_quantized/README.md
index 69d0e0ef..9c5b1a50 100644
--- a/qai_hub_models/models/riffusion_quantized/README.md
+++ b/qai_hub_models/models/riffusion_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Riffusion can be found
   [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE)
 
 ## References
 * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)
diff --git a/qai_hub_models/models/sam/README.md b/qai_hub_models/models/sam/README.md
index e4bc8748..a0ba93db 100644
--- a/qai_hub_models/models/sam/README.md
+++ b/qai_hub_models/models/sam/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Segment-Anything-Model can be found
   [here](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Segment Anything](https://arxiv.org/abs/2304.02643)
diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py
index 1492c33b..962a8058 100644
--- a/qai_hub_models/models/sam/export.py
+++ b/qai_hub_models/models/sam/export.py
@@ -204,7 +204,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -251,6 +251,7 @@ def main():
         components=ALL_COMPONENTS,
         supports_qnn=False,
         supports_ort=False,
+        supports_precompiled_ort=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml
index 515dc0f5..370ec409 100644
--- a/qai_hub_models/models/sam/perf.yaml
+++ b/qai_hub_models/models/sam/perf.yaml
@@ -36,19 +36,19 @@ models:
 - name: SAMDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 48828.0
-      throughput: 20.48005242893422
+      inference_time: 48230.0
+      throughput: 20.733982998133943
       estimated_peak_memory_range:
-        min: 4276224
-        max: 12829288
+        min: 4026368
+        max: 7727688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 342
+        layers_on_npu: 340
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 342
-      job_id: jqpyd2klp
+        total_layers: 340
+      job_id: j7gjklq15
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,21 +57,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.850316Z'
+    timestamp: '2024-06-08T23:12:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 35208.0
-      throughput: 28.402635764598955
+      inference_time: 34548.0
+      throughput: 28.9452356142179
       estimated_peak_memory_range:
-        min: 2613248
-        max: 249349152
+        min: 12288
+        max: 245149360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 342
+        layers_on_npu: 340
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 342
-      job_id: j1p87rdo5
+        total_layers: 340
+      job_id: jygzv7n4p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,21 +80,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.850373Z'
+    timestamp: '2024-06-08T23:12:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 48966.0
-      throughput: 20.422333864314012
+      inference_time: 48060.0
+      throughput: 20.807324178110694
       estimated_peak_memory_range:
-        min: 12288
-        max: 8203928
+        min: 4009984
+        max: 12530416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 342
+        layers_on_npu: 340
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 342
-      job_id: jn5q21xo5
+        total_layers: 340
+      job_id: jmg994dmg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,23 +103,15 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.850426Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.850433Z'
+    timestamp: '2024-06-08T23:12:50Z'
 - name: SAMEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11078146.0
-      throughput: 0.0902678119605934
+      inference_time: 12009970.0
+      throughput: 0.08326415469813829
       estimated_peak_memory_range:
-        min: 2718310400
-        max: 2721580152
+        min: 2723000320
+        max: 2727292856
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -127,7 +119,7 @@ models:
         layers_on_gpu: 37
         layers_on_cpu: 771
         total_layers: 808
-      job_id: j2p0r98np
+      job_id: jlpe4vy85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -136,13 +128,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.850531Z'
+    timestamp: '2024-06-08T23:12:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 10431133.0
-      throughput: 0.09586686316817167
+      inference_time: 9639117.0
+      throughput: 0.10374394252087614
       estimated_peak_memory_range:
-        min: 2547875840
-        max: 2907987472
+        min: 2582843392
+        max: 2946188672
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -150,7 +142,7 @@ models:
         layers_on_gpu: 37
         layers_on_cpu: 771
         total_layers: 808
-      job_id: jogky0wnp
+      job_id: jz5wm944g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -159,13 +151,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.850628Z'
+    timestamp: '2024-06-08T23:12:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 11464171.0
-      throughput: 0.08722828715656807
+      inference_time: 11285658.0
+      throughput: 0.08860803685527242
       estimated_peak_memory_range:
-        min: 2717503488
-        max: 2721696880
+        min: 2642145280
+        max: 2645812336
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -173,7 +165,7 @@ models:
         layers_on_gpu: 37
         layers_on_cpu: 771
         total_layers: 808
-      job_id: j1glk8dmp
+      job_id: jnp1q86ng
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -182,12 +174,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.850725Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.850731Z'
+    timestamp: '2024-06-08T23:12:51Z'
diff --git a/qai_hub_models/models/sesr_m5/README.md b/qai_hub_models/models/sesr_m5/README.md
index eb36ea36..9cec4f6c 100644
--- a/qai_hub_models/models/sesr_m5/README.md
+++ b/qai_hub_models/models/sesr_m5/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SESR-M5 can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Collapsible Linear Blocks for Super-Efficient Super Resolution](https://arxiv.org/abs/2103.09404)
diff --git a/qai_hub_models/models/sesr_m5/demo.py b/qai_hub_models/models/sesr_m5/demo.py
index 312bbab4..51607bda 100644
--- a/qai_hub_models/models/sesr_m5/demo.py
+++ b/qai_hub_models/models/sesr_m5/demo.py
@@ -3,12 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID, SESR_M5
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "sesr_m5_demo.jpg"
-)
+from qai_hub_models.models.sesr_m5.model import MODEL_ID, SESR_M5
 
 
 # Run QuickSRNet end-to-end on a sample image.
@@ -17,7 +12,6 @@ def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=SESR_M5,
         model_id=MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
 
diff --git a/qai_hub_models/models/sesr_m5/export.py b/qai_hub_models/models/sesr_m5/export.py
index 07e5a42c..de36e9d6 100644
--- a/qai_hub_models/models/sesr_m5/export.py
+++ b/qai_hub_models/models/sesr_m5/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/sesr_m5/info.yaml b/qai_hub_models/models/sesr_m5/info.yaml
index 06f7dae2..f6715c77 100644
--- a/qai_hub_models/models/sesr_m5/info.yaml
+++ b/qai_hub_models/models/sesr_m5/info.yaml
@@ -10,13 +10,14 @@ tags: []
 research_paper: https://arxiv.org/abs/2103.09404
 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr
 technical_details:
-  Model checkpoint: sesr_m5_4x_checkpoint_float32
-  Input resolution: 128x128
-  Number of parameters: 343K
-  Model size: 1.32 MB
+  Model checkpoint: sesr_m5_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 338K
+  Model size: 1.30 MB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/sesr_m5/model.py b/qai_hub_models/models/sesr_m5/model.py
index c7283ab9..b8aa863e 100644
--- a/qai_hub_models/models/sesr_m5/model.py
+++ b/qai_hub_models/models/sesr_m5/model.py
@@ -4,81 +4,43 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import torch
+from pathlib import Path
 
-from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
-from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
 from qai_hub_models.models._shared.sesr.common import _load_sesr_source_model
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel
-from qai_hub_models.utils.input_spec import InputSpec
+from qai_hub_models.models._shared.super_resolution.model import (
+    DEFAULT_SCALE_FACTOR,
+    SuperResolutionModel,
+    validate_scale_factor,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch
 
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 3
-# Weights and config stored in S3 are sourced from
-# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/sesr/model/model_cards/sesr_m5_2x_w8a8.json
-# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/sesr_m5_2x_checkpoint_float32.pth.tar
-SESR_WEIGHTS = "sesr_m5_4x_checkpoint_float32.pth.tar"
-SCALING_FACTOR = 4
+BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/sesr_m5_{scale_factor}x_checkpoint_float32.pth.tar"
 NUM_CHANNELS = 16
 NUM_LBLOCKS = 5
 
 
-class SESR_M5(BaseModel):
-    """Exportable SESR M5 super resolution model, end-to-end."""
-
-    def __init__(
-        self,
-        sesr_model: torch.nn.Module,
-    ) -> None:
-        super().__init__()
-        self.model = sesr_model
+class SESR_M5(SuperResolutionModel):
+    """Exportable SESR_M5 super resolution model, end-to-end."""
 
     @classmethod
-    def from_pretrained(cls) -> SESR_M5:
+    def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> SESR_M5:
+        validate_scale_factor(scale_factor)
         model = _load_sesr_source_model(
-            SCALING_FACTOR,
+            scale_factor,
             NUM_CHANNELS,
             NUM_LBLOCKS,
         )
-        dst = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, SESR_WEIGHTS
-        ).fetch()
-        checkpoint = torch.load(dst, map_location=torch.device("cpu"))
+        url = BASE_ASSET_URL.format(scale_factor=scale_factor)
+        checkpoint_asset = CachedWebModelAsset(
+            url,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            Path(url).name,
+        )
+        checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
         model.eval()
 
-        return cls(model)
-
-    def get_evaluator(self) -> BaseEvaluator:
-        return SuperResolutionOutputEvaluator()
-
-    def forward(self, image: torch.Tensor) -> torch.Tensor:
-        """
-        Run SESR M5 on `image`, and produce an upscaled image
-
-        Parameters:
-            image: Pixel values pre-processed for model consumption.
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-
-        Returns:
-            image: Pixel values
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-        """
-
-        return self.model(image)
-
-    @staticmethod
-    def get_input_spec(
-        batch_size: int = 1,
-        num_channels: int = 3,
-        height: int = 128,
-        width: int = 128,
-    ) -> InputSpec:
-        # Get the input specification ordered (name -> (shape, type)) pairs for this model.
-        #
-        # This can be used with the qai_hub python API to declare
-        # the model input specification upon submitting a profile job.
-        return {"image": ((batch_size, num_channels, height, width), "float32")}
+        return cls(model, scale_factor)
diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml
index 22d99c5a..04e56cbe 100644
--- a/qai_hub_models/models/sesr_m5/perf.yaml
+++ b/qai_hub_models/models/sesr_m5/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: SESR-M5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2218.0
-      throughput: 450.8566275924256
+      inference_time: 2201.0
+      throughput: 454.3389368468878
       estimated_peak_memory_range:
-        min: 20480
-        max: 2072008
+        min: 16384
+        max: 2206696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jmg942r85
+      job_id: j1gle1ymp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2126.0
-      throughput: 470.36688617121354
+      inference_time: 2133.0
+      throughput: 468.8232536333802
       estimated_peak_memory_range:
-        min: 16384
-        max: 7778080
+        min: 2113536
+        max: 6868544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jz57dnm95
+      job_id: jwgoe4lkp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2863.0
-      throughput: 349.28396786587496
+      inference_time: 2875.0
+      throughput: 347.82608695652175
       estimated_peak_memory_range:
-        min: 24576
-        max: 5828608
+        min: 12288
+        max: 6151368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jegneq4qg
+      job_id: jygzv4lxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.879776Z'
+    timestamp: '2024-06-11T11:58:43Z'
   - torchscript_onnx_tflite:
       inference_time: 1621.0
       throughput: 616.9031462060457
       estimated_peak_memory_range:
         min: 16384
-        max: 24958624
+        max: 25573456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jnp18197g
+      job_id: jw56qd8yg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1468.0
-      throughput: 681.1989100817439
+      inference_time: 1460.0
+      throughput: 684.931506849315
       estimated_peak_memory_range:
-        min: 0
-        max: 21931168
+        min: 204800
+        max: 26892880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jqp4w471g
+      job_id: j1pvz9lrg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1906.0
-      throughput: 524.6589716684156
+      inference_time: 1954.0
+      throughput: 511.77072671443193
       estimated_peak_memory_range:
-        min: 208896
-        max: 17848208
+        min: 212992
+        max: 20764320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: joprydr7g
+      job_id: jz5wm1lmg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.879821Z'
+    timestamp: '2024-06-11T11:58:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 2280.0
-      throughput: 438.5964912280702
+      inference_time: 2290.0
+      throughput: 436.68122270742356
       estimated_peak_memory_range:
-        min: 24576
-        max: 1992472
+        min: 28672
+        max: 8571536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jvgdv4kzg
+      job_id: j1p3qwzn5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2143.0
-      throughput: 466.63555762949136
+      inference_time: 2135.0
+      throughput: 468.384074941452
       estimated_peak_memory_range:
-        min: 217088
-        max: 4245840
+        min: 16384
+        max: 9688296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jo5mzk79p
+      job_id: jlpe4l7v5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.879850Z'
+    timestamp: '2024-06-11T11:58:42Z'
   - torchscript_onnx_qnn:
-      inference_time: 2938.0
-      throughput: 340.3675970047652
+      inference_time: 2971.0
+      throughput: 336.58700774150117
       estimated_peak_memory_range:
-        min: 245760
-        max: 245760
+        min: 221184
+        max: 221184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j0px1rqlg
+      job_id: j7gjkwre5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2937.0
-      throughput: 340.4834865509023
+      inference_time: 2911.0
+      throughput: 343.52456200618343
       estimated_peak_memory_range:
-        min: 12759040
-        max: 12759040
+        min: 12976128
+        max: 12976128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jep2md1q5
+      job_id: jmg99xz8g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.879880Z'
+    timestamp: '2024-06-11T11:58:44Z'
diff --git a/qai_hub_models/models/sesr_m5/test.py b/qai_hub_models/models/sesr_m5/test.py
index e59f48d4..e203aa30 100644
--- a/qai_hub_models/models/sesr_m5/test.py
+++ b/qai_hub_models/models/sesr_m5/test.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.sesr_m5.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
 from qai_hub_models.models.sesr_m5.demo import main as demo_main
 from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID, SESR_M5
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
@@ -23,9 +23,6 @@ def test_task():
     model = SESR_M5.from_pretrained()
     app = SuperResolutionApp(model=model)
     output_img = app.upscale_image(image)[0]
-
-    output_img.save("/local/mnt/workspace/sesr_m5_output.png")
-
     expected_output_image = load_image(OUTPUT_IMAGE_ADDRESS)
     assert_most_same(
         np.asarray(expected_output_image, dtype=np.float32),
diff --git a/qai_hub_models/models/sesr_m5_quantized/README.md b/qai_hub_models/models/sesr_m5_quantized/README.md
index 57bc9490..f8346830 100644
--- a/qai_hub_models/models/sesr_m5_quantized/README.md
+++ b/qai_hub_models/models/sesr_m5_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SESR-M5-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Collapsible Linear Blocks for Super-Efficient Super Resolution](https://arxiv.org/abs/2103.09404)
diff --git a/qai_hub_models/models/sesr_m5_quantized/demo.py b/qai_hub_models/models/sesr_m5_quantized/demo.py
index 4d063bdd..9c5c43c0 100644
--- a/qai_hub_models/models/sesr_m5_quantized/demo.py
+++ b/qai_hub_models/models/sesr_m5_quantized/demo.py
@@ -3,26 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.sesr_m5_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    SESR_M5Quantizable,
-)
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import TargetRuntime
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "sesr_m5_quantized_demo.jpg"
-)
+from qai_hub_models.models.sesr_m5_quantized.model import MODEL_ID, SESR_M5Quantizable
 
 
 def main(is_test: bool = False):
     super_resolution_demo(
         SESR_M5Quantizable,
         MODEL_ID,
-        default_image=IMAGE_ADDRESS,
         is_test=is_test,
-        available_target_runtimes=[TargetRuntime.TFLITE],
     )
 
 
diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py
index 98ccc38e..770f96a9 100644
--- a/qai_hub_models/models/sesr_m5_quantized/export.py
+++ b/qai_hub_models/models/sesr_m5_quantized/export.py
@@ -30,6 +30,7 @@
     can_access_qualcomm_ai_hub,
     export_without_hub_access,
     transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
 )
 from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
 
@@ -124,7 +125,7 @@ def export_model(
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
-        " --force_channel_last_input image"
+        " --force_channel_last_input image" + " --force_channel_last_output output_0"
         if target_runtime != TargetRuntime.ORT
         else ""
     )
@@ -193,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -212,6 +213,14 @@ def export_model(
         torch_out = torch_inference(model, sample_inputs)
         assert inference_job is not None and inference_job.wait().success
         inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = (
+            inference_result
+            if target_runtime == TargetRuntime.ORT
+            else transpose_channel_last_to_first(
+                "output_0", inference_result, target_runtime
+            )
+        )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
     return (compile_job, profile_job, inference_job)
diff --git a/qai_hub_models/models/sesr_m5_quantized/info.yaml b/qai_hub_models/models/sesr_m5_quantized/info.yaml
index f62e37c6..1852e48b 100644
--- a/qai_hub_models/models/sesr_m5_quantized/info.yaml
+++ b/qai_hub_models/models/sesr_m5_quantized/info.yaml
@@ -10,13 +10,14 @@ tags: [quantized]
 research_paper: https://arxiv.org/abs/2103.09404
 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr
 technical_details:
-  Model checkpoint: sesr_m5_4x_checkpoint_int8
-  Input resolution: 128x128
-  Number of parameters: 32.3K
-  Model size: 45.9 KB
+  Model checkpoint: sesr_m5_4x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 338K
+  Model size: 389 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py
index de5c875e..2d6a806d 100644
--- a/qai_hub_models/models/sesr_m5_quantized/model.py
+++ b/qai_hub_models/models/sesr_m5_quantized/model.py
@@ -17,34 +17,21 @@
 from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
-from qai_hub_models.models._shared.sesr.common import _load_sesr_source_model
-from qai_hub_models.models.sesr_m5.model import (
-    NUM_CHANNELS,
-    NUM_LBLOCKS,
-    SCALING_FACTOR,
-    SESR_M5,
-)
-from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR
+from qai_hub_models.models.sesr_m5.model import SESR_M5
+from qai_hub_models.utils.aimet.config_loader import get_default_per_tensor_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 from qai_hub_models.utils.quantization_aimet import (
     constrain_quantized_inputs_to_image_range,
 )
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 3
-
-# Weights and config stored in S3 are sourced from
-# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/sesr/model/model_cards/sesr_m5_4x_w8a8.json:
-# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/sesr_m5_4x_checkpoint_int8.pth
-# and
-# https://raw.githubusercontent.com/quic/aimet/release-aimet-1.23/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.js
-# Encodings were generated with AIMET QuantSim library
-QUANTIZED_WEIGHTS = "sesr_m5_4x_checkpoint_int8.pth"
-AIMET_ENCODINGS = "sesr_m5_quantized_encodings.json"
+MODEL_ASSET_VERSION = 5
+DEFAULT_ENCODINGS = "sesr_m5_quantized_encodings.json"
 
 
 class SESR_M5Quantizable(AIMETQuantizableMixin, SESR_M5):
-    """QuickSRNetLarge with post train quantization support.
+    """SESR_M5 with post train quantization support.
 
     Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
     Support for quantizing using your own weights & data will come at a later date."""
@@ -52,38 +39,29 @@ class SESR_M5Quantizable(AIMETQuantizableMixin, SESR_M5):
     def __init__(
         self,
         sesr_model: QuantizationSimModel,
+        scale_factor: int,
     ) -> None:
-        SESR_M5.__init__(self, sesr_model.model)
+        SESR_M5.__init__(self, sesr_model.model, scale_factor)
         AIMETQuantizableMixin.__init__(self, sesr_model)
 
     @classmethod
     def from_pretrained(
         cls,
         aimet_encodings: str | None = "DEFAULT",
+        scale_factor: int = DEFAULT_SCALE_FACTOR,
     ) -> SESR_M5Quantizable:
         # Load Model
-        sesr = _load_sesr_source_model(SCALING_FACTOR, NUM_CHANNELS, NUM_LBLOCKS)
-        # The model is collapsed pre-quantization - see
-        # https://github.com/quic/aimet-model-zoo/blob/d09d2b0404d10f71a7640a87e9d5e5257b028802/aimet_zoo_torch/common/super_resolution/models.py#L110
-        sesr.collapse()
+        sesr = SESR_M5.from_pretrained(scale_factor)
         input_shape = SESR_M5.get_input_spec()["image"][0]
         sesr = prepare_model(sesr)
         equalize_model(sesr, input_shape)
 
-        # Download weights and quantization parameters
-        weights = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, QUANTIZED_WEIGHTS
-        ).fetch()
-
-        # Load the model weights and quantization parameters
-        state_dict = torch.load(weights, map_location=torch.device("cpu"))["state_dict"]
-        sesr.load_state_dict(state_dict)
         sim = QuantizationSimModel(
             sesr,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_default_aimet_config(),
+            config_file=get_default_per_tensor_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
         constrain_quantized_inputs_to_image_range(sim)
@@ -91,10 +69,10 @@ def from_pretrained(
         if aimet_encodings:
             if aimet_encodings == "DEFAULT":
                 aimet_encodings = CachedWebModelAsset.from_asset_store(
-                    MODEL_ID, MODEL_ASSET_VERSION, AIMET_ENCODINGS
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
         sim.model.eval()
 
-        return cls(sim)
+        return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml
index 0140a83e..617858a7 100644
--- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml
+++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml
@@ -42,49 +42,49 @@ models:
 - name: SESR-M5-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1320.0
-      throughput: 757.5757575757576
+      inference_time: 1220.0
+      throughput: 819.672131147541
       estimated_peak_memory_range:
-        min: 32768
-        max: 5687536
+        min: 24576
+        max: 1557800
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 11
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 3
-        total_layers: 14
-      job_id: j2p0r9wnp
+        total_layers: 25
+      job_id: jvgd7zdzg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 774.0
-      throughput: 1291.9896640826873
+      inference_time: 1050.0
+      throughput: 952.3809523809524
       estimated_peak_memory_range:
-        min: 73728
-        max: 15287688
+        min: 65536
+        max: 4040712
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 26
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: jw561mkyp
+        total_layers: 26
+      job_id: jegnr7zq5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1203.0
-      throughput: 831.255195344971
+      inference_time: 1055.0
+      throughput: 947.8672985781991
       estimated_peak_memory_range:
-        min: 2109440
-        max: 12179360
+        min: 12288
+        max: 4410832
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 19
+        layers_on_npu: 29
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 19
-      job_id: jlpevxjv5
+        total_layers: 29
+      job_id: j2p0evmn5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,51 +93,51 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.913920Z'
+    timestamp: '2024-06-11T11:59:10Z'
   - torchscript_onnx_tflite:
-      inference_time: 1063.0
-      throughput: 940.7337723424271
+      inference_time: 1043.0
+      throughput: 958.7727708533077
       estimated_peak_memory_range:
-        min: 12288
-        max: 21684464
+        min: 16384
+        max: 23270336
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 11
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 3
-        total_layers: 14
-      job_id: j1p87rno5
+        total_layers: 25
+      job_id: jz57v7e95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 539.0
-      throughput: 1855.287569573284
+      inference_time: 754.0
+      throughput: 1326.2599469496022
       estimated_peak_memory_range:
-        min: 61440
-        max: 17221232
+        min: 0
+        max: 21775952
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 26
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: j1p3m7yng
+        total_layers: 26
+      job_id: jopr1nl7g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 847.0
-      throughput: 1180.637544273908
+      inference_time: 808.0
+      throughput: 1237.6237623762377
       estimated_peak_memory_range:
-        min: 212992
-        max: 13365360
+        min: 24576
+        max: 16135216
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 19
+        layers_on_npu: 29
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 19
-      job_id: jygz7y1xp
+        total_layers: 29
+      job_id: j1p8w4eop
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,36 +146,36 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.913960Z'
+    timestamp: '2024-06-11T11:59:11Z'
   - torchscript_onnx_tflite:
-      inference_time: 1331.0
-      throughput: 751.3148009015778
+      inference_time: 1214.0
+      throughput: 823.7232289950576
       estimated_peak_memory_range:
         min: 12288
-        max: 1824872
+        max: 1428272
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 11
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 3
-        total_layers: 14
-      job_id: jogky01np
+        total_layers: 25
+      job_id: jqp4j9y1p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 772.0
-      throughput: 1295.3367875647668
+      inference_time: 1049.0
+      throughput: 953.2888465204957
       estimated_peak_memory_range:
-        min: 0
-        max: 24415048
+        min: 12288
+        max: 80506384
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 26
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: j1pvwmjrg
+        total_layers: 26
+      job_id: jqpyv7olp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,36 +184,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.913987Z'
+    timestamp: '2024-06-11T11:59:09Z'
   - torchscript_onnx_tflite:
-      inference_time: 4190.0
-      throughput: 238.6634844868735
+      inference_time: 3090.0
+      throughput: 323.62459546925567
       estimated_peak_memory_range:
-        min: 16384
-        max: 15082128
+        min: 12288
+        max: 16873840
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 11
+        layers_on_npu: 22
         layers_on_gpu: 0
         layers_on_cpu: 3
-        total_layers: 14
-      job_id: jn5q21no5
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1811.0
-      throughput: 552.1811154058531
-      estimated_peak_memory_range:
-        min: 65536
-        max: 17231504
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 14
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 14
-      job_id: j7gjlyjep
+        total_layers: 25
+      job_id: j0pxedll5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,21 +207,21 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.914013Z'
+    timestamp: '2024-06-11T11:59:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 5060.0
-      throughput: 197.62845849802372
+      inference_time: 16778.0
+      throughput: 59.60185957801883
       estimated_peak_memory_range:
-        min: 3223552
-        max: 11197400
+        min: 249856
+        max: 7422256
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 10
+        layers_on_npu: 22
         layers_on_gpu: 0
-        layers_on_cpu: 4
-        total_layers: 14
-      job_id: j1glk8jmp
+        layers_on_cpu: 3
+        total_layers: 25
+      job_id: jo5mvd095
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,36 +230,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.914028Z'
+    timestamp: '2024-06-11T11:59:05Z'
   - torchscript_onnx_qnn:
-      inference_time: 780.0
-      throughput: 1282.051282051282
+      inference_time: 1198.0
+      throughput: 834.7245409015025
       estimated_peak_memory_range:
-        min: 49152
-        max: 49152
+        min: 57344
+        max: 57344
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 14
+        layers_on_npu: 26
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 14
-      job_id: jwgovwjk5
+        total_layers: 26
+      job_id: jep23vrqg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1138.0
-      throughput: 878.7346221441124
+      inference_time: 1092.0
+      throughput: 915.7509157509157
       estimated_peak_memory_range:
-        min: 9015296
-        max: 9015296
+        min: 5398528
+        max: 5398528
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 19
+        layers_on_npu: 29
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 19
-      job_id: jz5w9zjmp
+        total_layers: 29
+      job_id: jogkr92n5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.914055Z'
+    timestamp: '2024-06-11T11:59:12Z'
diff --git a/qai_hub_models/models/sesr_m5_quantized/test.py b/qai_hub_models/models/sesr_m5_quantized/test.py
index 0ed36c55..0ac444df 100644
--- a/qai_hub_models/models/sesr_m5_quantized/test.py
+++ b/qai_hub_models/models/sesr_m5_quantized/test.py
@@ -10,13 +10,10 @@
 import torch
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.sesr_m5_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
+from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.sesr_m5_quantized.demo import main as demo_main
-from qai_hub_models.models.sesr_m5_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    SESR_M5Quantizable,
-)
+from qai_hub_models.models.sesr_m5_quantized.model import SESR_M5Quantizable
 from qai_hub_models.utils.asset_loaders import (
     CachedWebModelAsset,
     load_image,
@@ -24,7 +21,7 @@
 )
 from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
 
-OUTPUT_IMAGE_LOCAL_PATH = "sesr_m5_quantized_demo_output.png"
+OUTPUT_IMAGE_LOCAL_PATH = "sesr_m5_demo_output.png"
 OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
     MODEL_ID, MODEL_ASSET_VERSION, OUTPUT_IMAGE_LOCAL_PATH
 )
diff --git a/qai_hub_models/models/shufflenet_v2/README.md b/qai_hub_models/models/shufflenet_v2/README.md
index 97694e8a..6fcab0d3 100644
--- a/qai_hub_models/models/shufflenet_v2/README.md
+++ b/qai_hub_models/models/shufflenet_v2/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Shufflenet-v2 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
diff --git a/qai_hub_models/models/shufflenet_v2/evaluate.py b/qai_hub_models/models/shufflenet_v2/evaluate.py
new file mode 100644
index 00000000..3952a613
--- /dev/null
+++ b/qai_hub_models/models/shufflenet_v2/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.shufflenet_v2 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/shufflenet_v2/export.py b/qai_hub_models/models/shufflenet_v2/export.py
index 74e70e03..a3bfc586 100644
--- a/qai_hub_models/models/shufflenet_v2/export.py
+++ b/qai_hub_models/models/shufflenet_v2/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/shufflenet_v2/info.yaml b/qai_hub_models/models/shufflenet_v2/info.yaml
index 9663eb73..4610476e 100644
--- a/qai_hub_models/models/shufflenet_v2/info.yaml
+++ b/qai_hub_models/models/shufflenet_v2/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml
index 2185322d..0d002af4 100644
--- a/qai_hub_models/models/shufflenet_v2/perf.yaml
+++ b/qai_hub_models/models/shufflenet_v2/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Shufflenet-v2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1228.0
-      throughput: 814.3322475570033
+      inference_time: 1229.0
+      throughput: 813.6696501220505
       estimated_peak_memory_range:
-        min: 24576
-        max: 2182672
+        min: 49152
+        max: 1892400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 204
-      job_id: jnp181r7g
+      job_id: jqpyvdx0p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 779.0
-      throughput: 1283.6970474967907
+      inference_time: 767.0
+      throughput: 1303.7809647979138
       estimated_peak_memory_range:
-        min: 507904
-        max: 18379728
+        min: 12288
+        max: 127973560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: jqp4w4z1g
+      job_id: jogkry2v5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1097.0
-      throughput: 911.5770282588878
+      inference_time: 1093.0
+      throughput: 914.9130832570905
       estimated_peak_memory_range:
         min: 0
-        max: 10126672
+        max: 4739736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: joprydz7g
+      job_id: j1p3qmzm5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.957399Z'
+    timestamp: '2024-06-08T23:14:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 815.0
-      throughput: 1226.993865030675
+      inference_time: 816.0
+      throughput: 1225.4901960784314
       estimated_peak_memory_range:
-        min: 16384
-        max: 33495808
+        min: 12288
+        max: 34358736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 204
-      job_id: jvgdv4jzg
+      job_id: j2p0erm05
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 517.0
-      throughput: 1934.2359767891683
+      inference_time: 519.0
+      throughput: 1926.7822736030828
       estimated_peak_memory_range:
         min: 12288
-        max: 54005488
+        max: 59916624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: j0px1rwlg
+      job_id: jn5q92lep
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 754.0
-      throughput: 1326.2599469496022
+      inference_time: 762.0
+      throughput: 1312.3359580052493
       estimated_peak_memory_range:
         min: 12288
-        max: 23340336
+        max: 24863536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jep2md2q5
+      job_id: jwgoevl1p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.957489Z'
+    timestamp: '2024-06-08T23:14:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 1229.0
-      throughput: 813.6696501220505
+      inference_time: 1223.0
+      throughput: 817.6614881439084
       estimated_peak_memory_range:
         min: 28672
-        max: 1971648
+        max: 1440712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 204
-      job_id: jz57dnq95
+      job_id: j1p8w7eqp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 768.0
-      throughput: 1302.0833333333333
+      inference_time: 769.0
+      throughput: 1300.3901170351105
       estimated_peak_memory_range:
-        min: 618496
-        max: 17970352
+        min: 622592
+        max: 138856072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: jegneqjqg
+      job_id: jw56q18ng
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.957545Z'
+    timestamp: '2024-06-08T23:14:42Z'
   - torchscript_onnx_qnn:
-      inference_time: 950.0
-      throughput: 1052.6315789473683
+      inference_time: 1095.0
+      throughput: 913.2420091324201
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: jo5mzkj9p
+      job_id: j1gleky2p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1131.0
-      throughput: 884.1732979664014
+      inference_time: 1103.0
+      throughput: 906.6183136899365
       estimated_peak_memory_range:
-        min: 8019968
-        max: 8019968
+        min: 5971968
+        max: 5971968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jqpyd29lp
+      job_id: j1pvzwlzg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.957606Z'
+    timestamp: '2024-06-08T23:14:45Z'
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/README.md b/qai_hub_models/models/shufflenet_v2_quantized/README.md
index 32b3c576..f2663608 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/README.md
+++ b/qai_hub_models/models/shufflenet_v2_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Shufflenet-v2Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/evaluate.py b/qai_hub_models/models/shufflenet_v2_quantized/evaluate.py
new file mode 100644
index 00000000..2fb7d9af
--- /dev/null
+++ b/qai_hub_models/models/shufflenet_v2_quantized/evaluate.py
@@ -0,0 +1,63 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.shufflenet_v2_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+        supports_ort=False,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/export.py b/qai_hub_models/models/shufflenet_v2_quantized/export.py
index 4692513f..437ca9e5 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/export.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml
index afd1d7e6..25dde27d 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml
+++ b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/model.py b/qai_hub_models/models/shufflenet_v2_quantized/model.py
index afc7535d..a3d7540d 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/model.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/model.py
@@ -26,7 +26,7 @@
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 from qai_hub_models.utils.quantization_aimet import (
     convert_all_depthwise_to_per_tensor,
-    tie_aimet_observer_groups,
+    tie_observers,
 )
 
 MODEL_ID = __name__.split(".")[-2]
@@ -82,7 +82,7 @@ def from_pretrained(
             dummy_input=dummy_input,
         )
         convert_all_depthwise_to_per_tensor(sim.model)
-        cls._tie_pre_concat_quantizers(sim)
+        tie_observers(sim)
         constrain_quantized_inputs_to_image_range(sim)
 
         if aimet_encodings:
@@ -94,57 +94,3 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
-
-    @classmethod
-    def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel):
-        """
-        This ties together the output quantizers prior to concatenations. This
-        prevents unnecessary re-quantization during the concatenation.
-        """
-        n = sim.model.net
-        # Because of skip connections, the groups are large
-        groups = [
-            [
-                getattr(getattr(n.stage2, "0").branch1, "4"),
-                getattr(getattr(n.stage2, "0").branch2, "7"),
-                getattr(n.stage2, "0").module_cat,
-                getattr(getattr(n.stage2, "1").branch2, "7"),
-                getattr(n.stage2, "1").module_cat_1,
-                getattr(getattr(n.stage2, "2").branch2, "7"),
-                getattr(n.stage2, "2").module_cat_2,
-                getattr(getattr(n.stage2, "3").branch2, "7"),
-                getattr(n.stage2, "3").module_cat_3,
-            ],
-            [
-                getattr(getattr(n.stage3, "0").branch1, "4"),
-                getattr(getattr(n.stage3, "0").branch2, "7"),
-                getattr(n.stage3, "0").module_cat_4,
-                getattr(getattr(n.stage3, "1").branch2, "7"),
-                getattr(n.stage3, "1").module_cat_5,
-                getattr(getattr(n.stage3, "2").branch2, "7"),
-                getattr(n.stage3, "2").module_cat_6,
-                getattr(getattr(n.stage3, "3").branch2, "7"),
-                getattr(n.stage3, "3").module_cat_7,
-                getattr(getattr(n.stage3, "4").branch2, "7"),
-                getattr(n.stage3, "4").module_cat_8,
-                getattr(getattr(n.stage3, "5").branch2, "7"),
-                getattr(n.stage3, "5").module_cat_9,
-                getattr(getattr(n.stage3, "6").branch2, "7"),
-                getattr(n.stage3, "6").module_cat_10,
-                getattr(getattr(n.stage3, "7").branch2, "7"),
-                getattr(n.stage3, "7").module_cat_11,
-            ],
-            [
-                getattr(getattr(n.stage4, "0").branch1, "4"),
-                getattr(getattr(n.stage4, "0").branch2, "7"),
-                getattr(n.stage4, "0").module_cat_12,
-                getattr(getattr(n.stage4, "1").branch2, "7"),
-                getattr(n.stage4, "1").module_cat_13,
-                getattr(getattr(n.stage4, "2").branch2, "7"),
-                getattr(n.stage4, "2").module_cat_14,
-                getattr(getattr(n.stage4, "3").branch2, "7"),
-                getattr(n.stage4, "3").module_cat_15,
-            ],
-        ]
-
-        tie_aimet_observer_groups(groups)
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
index 0b0a30d7..342366ce 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
+++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: Shufflenet-v2Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 635.0
-      throughput: 1574.8031496062993
+      inference_time: 629.0
+      throughput: 1589.825119236884
       estimated_peak_memory_range:
-        min: 12288
-        max: 1986880
+        min: 16384
+        max: 1932240
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: j1p87rlo5
+      job_id: jlpe4v785
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 588.0
-      throughput: 1700.6802721088436
+      inference_time: 582.0
+      throughput: 1718.213058419244
       estimated_peak_memory_range:
-        min: 184320
-        max: 3281704
+        min: 16384
+        max: 102592048
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,7 +69,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: j1p3m7kng
+      job_id: jvgd7vd6g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +78,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:42.991988Z'
+    timestamp: '2024-06-08T23:15:22Z'
   - torchscript_onnx_tflite:
-      inference_time: 455.0
-      throughput: 2197.802197802198
+      inference_time: 459.0
+      throughput: 2178.649237472767
       estimated_peak_memory_range:
         min: 12288
-        max: 22965888
+        max: 23307232
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,14 +92,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: jogky0jnp
+      job_id: jygzv7l4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 420.0
-      throughput: 2380.9523809523807
+      inference_time: 418.0
+      throughput: 2392.3444976076553
       estimated_peak_memory_range:
-        min: 0
-        max: 46946848
+        min: 163840
+        max: 50012432
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,7 +107,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: jwgovwyk5
+      job_id: jz5wm9lzg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +116,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:42.992051Z'
+    timestamp: '2024-06-08T23:15:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 623.0
-      throughput: 1605.1364365971108
+      inference_time: 649.0
+      throughput: 1540.8320493066255
       estimated_peak_memory_range:
-        min: 16384
-        max: 1963320
+        min: 12288
+        max: 1631760
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,14 +130,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: jn5q21jo5
+      job_id: jz5wm9l4g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 586.0
-      throughput: 1706.4846416382252
+      inference_time: 585.0
+      throughput: 1709.4017094017095
       estimated_peak_memory_range:
-        min: 167936
-        max: 23791000
+        min: 163840
+        max: 77147648
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -145,7 +145,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: j7gjlyxep
+      job_id: jnp1q8nkg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -154,13 +154,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:42.992107Z'
+    timestamp: '2024-06-08T23:15:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 972.0
-      throughput: 1028.80658436214
+      inference_time: 944.0
+      throughput: 1059.322033898305
       estimated_peak_memory_range:
         min: 12288
-        max: 16548544
+        max: 17202032
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -168,22 +168,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: j1glk8nmp
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1101.0
-      throughput: 908.2652134423251
-      estimated_peak_memory_range:
-        min: 163840
-        max: 43822880
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 122
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 122
-      job_id: jlpevx9v5
+      job_id: jmg994zmg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -192,13 +177,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:42.992163Z'
+    timestamp: '2024-06-08T23:15:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 10657.0
-      throughput: 93.83503800319039
+      inference_time: 8510.0
+      throughput: 117.50881316098707
       estimated_peak_memory_range:
-        min: 147456
-        max: 5412728
+        min: 65536
+        max: 5500048
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -206,7 +191,7 @@ models:
         layers_on_gpu: 9
         layers_on_cpu: 153
         total_layers: 205
-      job_id: jw561m6yp
+      job_id: jnp1q8nng
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -215,13 +200,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:42.992198Z'
+    timestamp: '2024-06-08T23:15:21Z'
   - torchscript_onnx_qnn:
-      inference_time: 709.0
-      throughput: 1410.4372355430182
+      inference_time: 694.0
+      throughput: 1440.922190201729
       estimated_peak_memory_range:
-        min: 589824
-        max: 589824
+        min: 618496
+        max: 618496
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -229,7 +214,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: j1pvwm3rg
+      job_id: jmg994zqg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -238,4 +223,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:42.992226Z'
+    timestamp: '2024-06-08T23:15:24Z'
diff --git a/qai_hub_models/models/sinet/README.md b/qai_hub_models/models/sinet/README.md
index 601b6d46..82d1c945 100644
--- a/qai_hub_models/models/sinet/README.md
+++ b/qai_hub_models/models/sinet/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SINet can be found
   [here](https://github.com/clovaai/ext_portrait_segmentation/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules and Information Blocking Decoder](https://arxiv.org/abs/1911.09099)
diff --git a/qai_hub_models/models/sinet/export.py b/qai_hub_models/models/sinet/export.py
index dcfe796d..495dde9f 100644
--- a/qai_hub_models/models/sinet/export.py
+++ b/qai_hub_models/models/sinet/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml
index 8a9f923a..79d10a22 100644
--- a/qai_hub_models/models/sinet/perf.yaml
+++ b/qai_hub_models/models/sinet/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: SINet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1838.0
-      throughput: 544.069640914037
+      inference_time: 1808.0
+      throughput: 553.0973451327434
       estimated_peak_memory_range:
         min: 16384
-        max: 2227656
+        max: 1874832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jvgdv4wzg
+      job_id: jo5mvz0y5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1175.0
-      throughput: 851.063829787234
+      inference_time: 1170.0
+      throughput: 854.7008547008547
       estimated_peak_memory_range:
         min: 16384
-        max: 6837024
+        max: 4437520
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: j0px1rvlg
+      job_id: jep23moxg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2272.0
-      throughput: 440.14084507042253
+      inference_time: 2242.0
+      throughput: 446.03033006244425
       estimated_peak_memory_range:
-        min: 2109440
-        max: 8476840
+        min: 233472
+        max: 61135024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: jep2md8q5
+      job_id: jogkry6y5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.027778Z'
+    timestamp: '2024-06-08T23:15:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 1189.0
-      throughput: 841.0428931875525
+      inference_time: 1188.0
+      throughput: 841.7508417508418
       estimated_peak_memory_range:
         min: 12288
-        max: 26086320
+        max: 27213536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jz57dnz95
+      job_id: jegnre1v5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 783.0
-      throughput: 1277.139208173691
+      inference_time: 784.0
+      throughput: 1275.5102040816328
       estimated_peak_memory_range:
-        min: 12288
-        max: 68156960
+        min: 0
+        max: 67399104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jo5mzkr9p
+      job_id: jqpyvd8rp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1540.0
-      throughput: 649.3506493506494
+      inference_time: 1564.0
+      throughput: 639.386189258312
       estimated_peak_memory_range:
-        min: 544768
-        max: 27802000
+        min: 12288
+        max: 25637744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: jqpyd2elp
+      job_id: jn5q9247p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.027880Z'
+    timestamp: '2024-06-08T23:15:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 1802.0
-      throughput: 554.9389567147614
+      inference_time: 1809.0
+      throughput: 552.791597567717
       estimated_peak_memory_range:
-        min: 28672
-        max: 1897304
+        min: 12288
+        max: 1931632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jqp4w4q1g
+      job_id: jopr1yxvg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1174.0
-      throughput: 851.7887563884157
+      inference_time: 1183.0
+      throughput: 845.30853761623
       estimated_peak_memory_range:
-        min: 16384
-        max: 19845704
+        min: 622592
+        max: 10230296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: joprydk7g
+      job_id: j1p8w7jzp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.027978Z'
+    timestamp: '2024-06-08T23:15:54Z'
   - torchscript_onnx_qnn:
-      inference_time: 1410.0
-      throughput: 709.2198581560284
+      inference_time: 1353.0
+      throughput: 739.0983000739099
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jegneq2qg
+      job_id: j2p0ero25
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2312.0
-      throughput: 432.52595155709344
+      inference_time: 2343.0
+      throughput: 426.8032437046522
       estimated_peak_memory_range:
-        min: 3145728
-        max: 3145728
+        min: 6090752
+        max: 6090752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: j2p0r9ynp
+      job_id: j1glekwep
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.028046Z'
+    timestamp: '2024-06-08T23:15:56Z'
diff --git a/qai_hub_models/models/squeezenet1_1/README.md b/qai_hub_models/models/squeezenet1_1/README.md
index 879ef789..48b5f7ed 100644
--- a/qai_hub_models/models/squeezenet1_1/README.md
+++ b/qai_hub_models/models/squeezenet1_1/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SqueezeNet-1_1 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360)
diff --git a/qai_hub_models/models/squeezenet1_1/evaluate.py b/qai_hub_models/models/squeezenet1_1/evaluate.py
new file mode 100644
index 00000000..eb509750
--- /dev/null
+++ b/qai_hub_models/models/squeezenet1_1/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.squeezenet1_1 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/squeezenet1_1/export.py b/qai_hub_models/models/squeezenet1_1/export.py
index 5185e0bc..361dd2cc 100644
--- a/qai_hub_models/models/squeezenet1_1/export.py
+++ b/qai_hub_models/models/squeezenet1_1/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/squeezenet1_1/info.yaml b/qai_hub_models/models/squeezenet1_1/info.yaml
index dba5e172..9cb093e0 100644
--- a/qai_hub_models/models/squeezenet1_1/info.yaml
+++ b/qai_hub_models/models/squeezenet1_1/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml
index 438ef1ef..e1645dcb 100644
--- a/qai_hub_models/models/squeezenet1_1/perf.yaml
+++ b/qai_hub_models/models/squeezenet1_1/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: SqueezeNet-1_1
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 660.0
-      throughput: 1515.1515151515152
+      inference_time: 646.0
+      throughput: 1547.9876160990711
       estimated_peak_memory_range:
-        min: 28672
-        max: 5545600
+        min: 20480
+        max: 1582896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jogky0znp
+      job_id: j1p3qmox5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 705.0
-      throughput: 1418.4397163120568
+      inference_time: 702.0
+      throughput: 1424.5014245014245
       estimated_peak_memory_range:
         min: 16384
-        max: 6892784
+        max: 7170920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jw561mjyp
+      job_id: j7gjkl375
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 666.0
-      throughput: 1501.5015015015015
+      inference_time: 671.0
+      throughput: 1490.312965722802
       estimated_peak_memory_range:
         min: 12288
-        max: 7528472
+        max: 11919448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: j7gjlymep
+      job_id: jmg994oqg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.062080Z'
+    timestamp: '2024-06-08T23:16:21Z'
   - torchscript_onnx_tflite:
-      inference_time: 462.0
-      throughput: 2164.5021645021643
+      inference_time: 452.0
+      throughput: 2212.3893805309735
       estimated_peak_memory_range:
-        min: 24576
-        max: 22715072
+        min: 18399232
+        max: 41710416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jn5q218o5
+      job_id: jwgoevd4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 491.0
-      throughput: 2036.6598778004072
+      inference_time: 492.0
+      throughput: 2032.520325203252
       estimated_peak_memory_range:
-        min: 618496
-        max: 31784512
+        min: 0
+        max: 32687824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: j1p3m73ng
+      job_id: jlpe4v675
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 485.0
-      throughput: 2061.855670103093
+      inference_time: 477.0
+      throughput: 2096.4360587002097
       estimated_peak_memory_range:
         min: 12288
-        max: 17557776
+        max: 19637184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jlpevx1v5
+      job_id: jnp1q8okg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.062134Z'
+    timestamp: '2024-06-08T23:16:22Z'
   - torchscript_onnx_tflite:
-      inference_time: 655.0
-      throughput: 1526.7175572519084
+      inference_time: 664.0
+      throughput: 1506.0240963855422
       estimated_peak_memory_range:
-        min: 24576
-        max: 1501432
+        min: 20480
+        max: 1398696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: j1glk8zmp
+      job_id: j1pvzw27g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 705.0
-      throughput: 1418.4397163120568
+      inference_time: 698.0
+      throughput: 1432.6647564469913
       estimated_peak_memory_range:
-        min: 622592
-        max: 6571960
+        min: 634880
+        max: 7259784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: j1pvwmorg
+      job_id: jz5wm9yzg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.062167Z'
+    timestamp: '2024-06-08T23:16:20Z'
   - torchscript_onnx_qnn:
-      inference_time: 825.0
-      throughput: 1212.121212121212
+      inference_time: 801.0
+      throughput: 1248.4394506866417
       estimated_peak_memory_range:
-        min: 602112
-        max: 602112
+        min: 606208
+        max: 606208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jwgovw0k5
+      job_id: jygzv7zzp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 670.0
-      throughput: 1492.5373134328358
+      inference_time: 681.0
+      throughput: 1468.4287812041116
       estimated_peak_memory_range:
-        min: 2846720
-        max: 2846720
+        min: 3670016
+        max: 3670016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jygz7y9xp
+      job_id: jvgd7v6kg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.062205Z'
+    timestamp: '2024-06-08T23:16:23Z'
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/README.md b/qai_hub_models/models/squeezenet1_1_quantized/README.md
index 77ea9cee..e7e60338 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/README.md
+++ b/qai_hub_models/models/squeezenet1_1_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SqueezeNet-1_1Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360)
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/evaluate.py b/qai_hub_models/models/squeezenet1_1_quantized/evaluate.py
new file mode 100644
index 00000000..bdaf6536
--- /dev/null
+++ b/qai_hub_models/models/squeezenet1_1_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.squeezenet1_1_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/export.py b/qai_hub_models/models/squeezenet1_1_quantized/export.py
index ba6e7087..f68f076e 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/export.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml
index 8daf5c4d..6675eada 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml
+++ b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
index 19b393c3..1d3b45b2 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
+++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: SqueezeNet-1_1Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 218.0
-      throughput: 4587.155963302752
+      inference_time: 220.0
+      throughput: 4545.454545454545
       estimated_peak_memory_range:
-        min: 12288
-        max: 3523192
+        min: 16384
+        max: 1715824
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jmg942185
+      job_id: jqp4jweqp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 470.0
-      throughput: 2127.659574468085
+      inference_time: 467.0
+      throughput: 2141.3276231263385
       estimated_peak_memory_range:
-        min: 20480
-        max: 9844736
+        min: 167936
+        max: 10118072
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: jnp181lng
+      job_id: jep23mdxg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 563.0
-      throughput: 1776.1989342806394
+      inference_time: 450.0
+      throughput: 2222.222222222222
       estimated_peak_memory_range:
-        min: 618496
-        max: 7541392
+        min: 12288
+        max: 5507096
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 49
+        layers_on_npu: 47
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 49
-      job_id: jo5mzk27p
+        total_layers: 47
+      job_id: jogkry0y5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.096271Z'
+    timestamp: '2024-06-08T23:16:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 180.0
-      throughput: 5555.555555555556
+      inference_time: 179.0
+      throughput: 5586.592178770949
       estimated_peak_memory_range:
         min: 12288
-        max: 22121184
+        max: 22450960
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jnp181l7g
+      job_id: j0pxe10j5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 342.0
       throughput: 2923.9766081871344
       estimated_peak_memory_range:
-        min: 163840
-        max: 28352000
+        min: 12288
+        max: 27530432
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: jvgdv496g
+      job_id: jqpyvd2rp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 444.0
-      throughput: 2252.252252252252
+      inference_time: 372.0
+      throughput: 2688.1720430107525
       estimated_peak_memory_range:
         min: 12288
-        max: 14450080
+        max: 15334176
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 49
+        layers_on_npu: 47
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 49
-      job_id: jegneqyjg
+        total_layers: 47
+      job_id: jn5q9217p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.096323Z'
+    timestamp: '2024-06-08T23:16:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 221.0
-      throughput: 4524.886877828054
+      inference_time: 223.0
+      throughput: 4484.304932735426
       estimated_peak_memory_range:
-        min: 12288
-        max: 2010568
+        min: 20480
+        max: 1471296
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jvgdv49zg
+      job_id: jo5mvz9y5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 461.0
-      throughput: 2169.1973969631235
+      inference_time: 464.0
+      throughput: 2155.1724137931033
       estimated_peak_memory_range:
-        min: 172032
-        max: 42070744
+        min: 28672
+        max: 17992504
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: jqp4w4o2g
+      job_id: j1p8w7rzp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.096354Z'
+    timestamp: '2024-06-08T23:16:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 533.0
-      throughput: 1876.172607879925
+      inference_time: 526.0
+      throughput: 1901.1406844106464
       estimated_peak_memory_range:
         min: 12288
-        max: 15187344
+        max: 14752288
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jz5w9zv4p
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 961.0
-      throughput: 1040.5827263267429
-      estimated_peak_memory_range:
-        min: 12288
-        max: 23287696
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 45
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 45
-      job_id: j0px1rj8g
+      job_id: jegnreqv5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:43.096386Z'
+    timestamp: '2024-06-08T23:16:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 4102.0
-      throughput: 243.78352023403218
+      inference_time: 4092.0
+      throughput: 244.37927663734115
       estimated_peak_memory_range:
-        min: 16384
-        max: 7045232
+        min: 20480
+        max: 7234128
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jmg9421m5
+      job_id: jopr1ydvg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:43.096403Z'
+    timestamp: '2024-06-08T23:16:47Z'
   - torchscript_onnx_qnn:
-      inference_time: 562.0
-      throughput: 1779.3594306049822
+      inference_time: 536.0
+      throughput: 1865.6716417910447
       estimated_peak_memory_range:
-        min: 598016
-        max: 598016
+        min: 1843200
+        max: 1843200
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: jz57dnwn5
+      job_id: j2p0er925
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 585.0
-      throughput: 1709.4017094017095
+      inference_time: 472.0
+      throughput: 2118.64406779661
       estimated_peak_memory_range:
-        min: 2117632
-        max: 2117632
+        min: 2641920
+        max: 2641920
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 49
+        layers_on_npu: 47
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 49
-      job_id: joprydqkg
+        total_layers: 47
+      job_id: j1glek8ep
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.096437Z'
+    timestamp: '2024-06-08T23:16:54Z'
diff --git a/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md b/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md
index e7447ff1..286aab8d 100644
--- a/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md
+++ b/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Stable-Diffusion-v1.5 can be found
   [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE)
 
 ## References
 * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)
diff --git a/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md b/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md
index ade8bee8..69677b00 100644
--- a/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md
+++ b/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Stable-Diffusion-v2.1 can be found
   [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE)
 
 ## References
 * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)
diff --git a/qai_hub_models/models/stylegan2/README.md b/qai_hub_models/models/stylegan2/README.md
index ea9e6792..6ff284c8 100644
--- a/qai_hub_models/models/stylegan2/README.md
+++ b/qai_hub_models/models/stylegan2/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of StyleGAN2 can be found
   [here](https://github.com/NVlabs/stylegan3/blob/main/LICENSE.txt).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Analyzing and Improving the Image Quality of StyleGAN](http://arxiv.org/abs/1912.04958)
diff --git a/qai_hub_models/models/stylegan2/export.py b/qai_hub_models/models/stylegan2/export.py
index 4a1053bc..5a712592 100644
--- a/qai_hub_models/models/stylegan2/export.py
+++ b/qai_hub_models/models/stylegan2/export.py
@@ -179,7 +179,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -213,7 +213,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/stylegan2/perf.yaml b/qai_hub_models/models/stylegan2/perf.yaml
index b308729f..938965f9 100644
--- a/qai_hub_models/models/stylegan2/perf.yaml
+++ b/qai_hub_models/models/stylegan2/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: StyleGAN2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1684726.0
-      throughput: 0.5935683309926956
+      inference_time: 1649413.0
+      throughput: 0.6062762934450013
       estimated_peak_memory_range:
-        min: 1399386112
-        max: 1408039328
+        min: 1397805056
+        max: 2230233016
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -48,7 +48,7 @@ models:
         layers_on_gpu: 78
         layers_on_cpu: 402
         total_layers: 480
-      job_id: jqpyd2w0p
+      job_id: j1p3qm7x5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +57,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.139435Z'
+    timestamp: '2024-06-08T23:17:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 1246952.0
-      throughput: 0.8019554882625795
+      inference_time: 1311471.0
+      throughput: 0.7625025639148711
       estimated_peak_memory_range:
-        min: 1057603584
-        max: 1090183616
+        min: 1184645120
+        max: 1218773040
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -71,7 +71,7 @@ models:
         layers_on_gpu: 78
         layers_on_cpu: 402
         total_layers: 480
-      job_id: j2p0r970p
+      job_id: jwgoevw4p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +80,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.139505Z'
+    timestamp: '2024-06-08T23:17:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 1690139.0
-      throughput: 0.5916673125701496
+      inference_time: 1578379.0
+      throughput: 0.6335613943165742
       estimated_peak_memory_range:
-        min: 826093568
-        max: 2179375456
+        min: 1049174016
+        max: 1057203192
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -94,7 +94,7 @@ models:
         layers_on_gpu: 78
         layers_on_cpu: 402
         total_layers: 480
-      job_id: j1p87rvq5
+      job_id: j1pvzwm7g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,12 +103,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.139572Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.139580Z'
+    timestamp: '2024-06-08T23:17:54Z'
diff --git a/qai_hub_models/models/swin_base/README.md b/qai_hub_models/models/swin_base/README.md
index e1b53caa..ec878b5d 100644
--- a/qai_hub_models/models/swin_base/README.md
+++ b/qai_hub_models/models/swin_base/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Swin-Base can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030)
diff --git a/qai_hub_models/models/swin_base/evaluate.py b/qai_hub_models/models/swin_base/evaluate.py
new file mode 100644
index 00000000..052bc1fc
--- /dev/null
+++ b/qai_hub_models/models/swin_base/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.swin_base import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/swin_base/export.py b/qai_hub_models/models/swin_base/export.py
index a2591912..79d16b96 100644
--- a/qai_hub_models/models/swin_base/export.py
+++ b/qai_hub_models/models/swin_base/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/swin_base/info.yaml b/qai_hub_models/models/swin_base/info.yaml
index 00a55170..04918f59 100644
--- a/qai_hub_models/models/swin_base/info.yaml
+++ b/qai_hub_models/models/swin_base/info.yaml
@@ -44,3 +44,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml
index 848eeced..37ad14fa 100644
--- a/qai_hub_models/models/swin_base/perf.yaml
+++ b/qai_hub_models/models/swin_base/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Swin-Base
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 38343.0
-      throughput: 26.080379730328875
+      inference_time: 38045.0
+      throughput: 26.284662899198317
       estimated_peak_memory_range:
-        min: 57344
-        max: 3970680
+        min: 307200
+        max: 3648376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1568
-      job_id: j7gjly41p
+      job_id: jz57vdnq5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 31373.0
-      throughput: 31.874541803461575
+      inference_time: 31404.0
+      throughput: 31.84307731499172
       estimated_peak_memory_range:
-        min: 16384
-        max: 47301400
+        min: 57344
+        max: 46336408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jz5w9zn4p
+      job_id: jo5mvzky5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 64145.0
-      throughput: 15.589679632083561
+      inference_time: 63106.0
+      throughput: 15.846353754001205
       estimated_peak_memory_range:
-        min: 253952
-        max: 473800768
+        min: 278528
+        max: 457269496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1163
-      job_id: jz57dn3n5
+      job_id: jqpyvdyrp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.157642Z'
+    timestamp: '2024-06-08T23:18:38Z'
   - torchscript_onnx_tflite:
-      inference_time: 26180.0
-      throughput: 38.19709702062643
+      inference_time: 26266.0
+      throughput: 38.07203228508338
       estimated_peak_memory_range:
-        min: 40960
-        max: 499707440
+        min: 49152
+        max: 501753168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1568
-      job_id: jlpevx385
+      job_id: jqp4jw4qp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 22009.0
-      throughput: 45.43595801717479
+      inference_time: 22072.0
+      throughput: 45.30627038782168
       estimated_peak_memory_range:
         min: 0
-        max: 410340528
+        max: 409890496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jmg942em5
+      job_id: jegnrewv5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 44507.0
-      throughput: 22.468375761116228
+      inference_time: 44119.0
+      throughput: 22.66597157687164
       estimated_peak_memory_range:
-        min: 626688
-        max: 205027936
+        min: 643072
+        max: 204011072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1163
-      job_id: jqp4w402g
+      job_id: j2p0erx25
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.158083Z'
+    timestamp: '2024-06-08T23:18:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 38498.0
-      throughput: 25.975375344173724
+      inference_time: 38074.0
+      throughput: 26.264642538215053
       estimated_peak_memory_range:
-        min: 40960
-        max: 3943568
+        min: 61440
+        max: 4041520
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1568
-      job_id: jygz7yk4p
+      job_id: j0pxe1rj5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 31489.0
-      throughput: 31.757121534504112
+      inference_time: 31252.0
+      throughput: 31.997952131063613
       estimated_peak_memory_range:
-        min: 49152
-        max: 48603792
+        min: 61440
+        max: 51901248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jvgdv4l6g
+      job_id: jep23mzxg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.158389Z'
+    timestamp: '2024-06-08T23:18:37Z'
   - torchscript_onnx_qnn:
-      inference_time: 39136.0
-      throughput: 25.551921504497138
+      inference_time: 38623.0
+      throughput: 25.89130828780778
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jnp181xng
+      job_id: jopr1y7vg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 66331.0
-      throughput: 15.075907192715322
+      inference_time: 65447.0
+      throughput: 15.27953916909866
       estimated_peak_memory_range:
-        min: 685400064
-        max: 685400064
+        min: 552267776
+        max: 552267776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1163
-      job_id: j0px1r28g
+      job_id: j1p8w7kzp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.158657Z'
+    timestamp: '2024-06-08T23:18:40Z'
diff --git a/qai_hub_models/models/swin_small/README.md b/qai_hub_models/models/swin_small/README.md
index 01c8a31a..a661caf5 100644
--- a/qai_hub_models/models/swin_small/README.md
+++ b/qai_hub_models/models/swin_small/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Swin-Small can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030)
diff --git a/qai_hub_models/models/swin_small/evaluate.py b/qai_hub_models/models/swin_small/evaluate.py
new file mode 100644
index 00000000..8f1f1388
--- /dev/null
+++ b/qai_hub_models/models/swin_small/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.swin_small import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/swin_small/export.py b/qai_hub_models/models/swin_small/export.py
index 82947e38..67677bbe 100644
--- a/qai_hub_models/models/swin_small/export.py
+++ b/qai_hub_models/models/swin_small/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/swin_small/info.yaml b/qai_hub_models/models/swin_small/info.yaml
index ac042fe2..2a22d62e 100644
--- a/qai_hub_models/models/swin_small/info.yaml
+++ b/qai_hub_models/models/swin_small/info.yaml
@@ -43,3 +43,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml
index 3253371f..4bd928fb 100644
--- a/qai_hub_models/models/swin_small/perf.yaml
+++ b/qai_hub_models/models/swin_small/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Swin-Small
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 29371.0
-      throughput: 34.047189404514654
+      inference_time: 29054.0
+      throughput: 34.41866868589523
       estimated_peak_memory_range:
-        min: 57344
-        max: 3729912
+        min: 24576
+        max: 7976680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1563
-      job_id: jegneq8jg
+      job_id: jn5q92d7p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23540.0
-      throughput: 42.48088360237893
+      inference_time: 23697.0
+      throughput: 42.19943452757733
       estimated_peak_memory_range:
-        min: 49152
-        max: 39448744
+        min: 0
+        max: 40982576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: jqpyd200p
+      job_id: j1p3qmrx5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 56967.0
-      throughput: 17.55402250425685
+      inference_time: 56535.0
+      throughput: 17.688157778367383
       estimated_peak_memory_range:
-        min: 180224
-        max: 257939904
+        min: 57344
+        max: 250098192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1158
-      job_id: j1glkqm2p
+      job_id: jlpe4vn75
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.193426Z'
+    timestamp: '2024-06-08T23:19:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 19697.0
-      throughput: 50.76915266284206
+      inference_time: 19652.0
+      throughput: 50.8854060655404
       estimated_peak_memory_range:
-        min: 36864
-        max: 467583248
+        min: 45056
+        max: 468730016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1563
-      job_id: joprydjkg
+      job_id: j1glekqep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 16158.0
-      throughput: 61.888847629657135
+      inference_time: 16097.0
+      throughput: 62.123377026775174
       estimated_peak_memory_range:
         min: 0
-        max: 376508304
+        max: 371590576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: j1p87kyq5
+      job_id: jwgoev94p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 39073.0
-      throughput: 25.593120569191
+      inference_time: 39326.0
+      throughput: 25.42846971469257
       estimated_peak_memory_range:
-        min: 618496
-        max: 172619200
+        min: 651264
+        max: 174791408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1158
-      job_id: jw56104np
+      job_id: jygzv70zp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.193885Z'
+    timestamp: '2024-06-08T23:19:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 29137.0
-      throughput: 34.320623262518446
+      inference_time: 29025.0
+      throughput: 34.45305770887166
       estimated_peak_memory_range:
-        min: 81920
-        max: 3564816
+        min: 69632
+        max: 3142616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1563
-      job_id: jep2mdn65
+      job_id: jw56q10vg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23539.0
-      throughput: 42.48268830451591
+      inference_time: 23503.0
+      throughput: 42.54775986044335
       estimated_peak_memory_range:
-        min: 53248
-        max: 40793896
+        min: 36864
+        max: 38372320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: jn5q2dqe5
+      job_id: j7gjkl875
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.194196Z'
+    timestamp: '2024-06-08T23:19:16Z'
   - torchscript_onnx_qnn:
-      inference_time: 23958.0
-      throughput: 41.73971116119876
+      inference_time: 23778.0
+      throughput: 42.055681722600724
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: jogkykxvp
+      job_id: j1pvzwn7g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 58912.0
-      throughput: 16.97447039652363
+      inference_time: 58093.0
+      throughput: 17.213777907837432
       estimated_peak_memory_range:
-        min: 467292160
-        max: 467292160
+        min: 385679360
+        max: 385679360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1158
-      job_id: j1p3mr0mg
+      job_id: jz5wm9rzg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.194465Z'
+    timestamp: '2024-06-08T23:19:19Z'
diff --git a/qai_hub_models/models/swin_tiny/README.md b/qai_hub_models/models/swin_tiny/README.md
index 8549a629..e0733e34 100644
--- a/qai_hub_models/models/swin_tiny/README.md
+++ b/qai_hub_models/models/swin_tiny/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Swin-Tiny can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030)
diff --git a/qai_hub_models/models/swin_tiny/evaluate.py b/qai_hub_models/models/swin_tiny/evaluate.py
new file mode 100644
index 00000000..5c7b00e7
--- /dev/null
+++ b/qai_hub_models/models/swin_tiny/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.swin_tiny import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/swin_tiny/export.py b/qai_hub_models/models/swin_tiny/export.py
index df0a32e2..fbe5734d 100644
--- a/qai_hub_models/models/swin_tiny/export.py
+++ b/qai_hub_models/models/swin_tiny/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/swin_tiny/info.yaml b/qai_hub_models/models/swin_tiny/info.yaml
index aee47f6a..9a83a696 100644
--- a/qai_hub_models/models/swin_tiny/info.yaml
+++ b/qai_hub_models/models/swin_tiny/info.yaml
@@ -43,3 +43,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml
index 79d0a222..7281ba04 100644
--- a/qai_hub_models/models/swin_tiny/perf.yaml
+++ b/qai_hub_models/models/swin_tiny/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Swin-Tiny
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 17622.0
-      throughput: 56.74724775848372
+      inference_time: 17582.0
+      throughput: 56.87635081333182
       estimated_peak_memory_range:
-        min: 20480
-        max: 3034712
+        min: 49152
+        max: 3052248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 837
-      job_id: jlpevnm85
+      job_id: jnp1q8mkg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14919.0
-      throughput: 67.02862122126147
+      inference_time: 14870.0
+      throughput: 67.24949562878278
       estimated_peak_memory_range:
-        min: 49152
-        max: 37995888
+        min: 40960
+        max: 28468704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jmg94qnm5
+      job_id: jqp4jw2qp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 34287.0
-      throughput: 29.165572957680755
+      inference_time: 33752.0
+      throughput: 29.627873903768666
       estimated_peak_memory_range:
-        min: 77824
-        max: 165377480
+        min: 0
+        max: 143848064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 624
-      job_id: jqp4w2r2g
+      job_id: jopr1ymvg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.229036Z'
+    timestamp: '2024-06-08T23:19:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 11801.0
-      throughput: 84.7385814761461
+      inference_time: 11836.0
+      throughput: 84.48800270361609
       estimated_peak_memory_range:
         min: 40960
-        max: 288481344
+        max: 291213504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 837
-      job_id: jygz70d4p
+      job_id: jvgd7vmkg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 9948.0
-      throughput: 100.52271813429836
+      inference_time: 9960.0
+      throughput: 100.40160642570281
       estimated_peak_memory_range:
         min: 618496
-        max: 230153840
+        max: 226851856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jnp18mzng
+      job_id: j0pxe1zj5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 23929.0
-      throughput: 41.79029629320072
+      inference_time: 23820.0
+      throughput: 41.98152812762385
       estimated_peak_memory_range:
-        min: 36864
-        max: 111906272
+        min: 53248
+        max: 113324624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 624
-      job_id: j0px1zo8g
+      job_id: jep23mqxg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.229264Z'
+    timestamp: '2024-06-08T23:19:50Z'
   - torchscript_onnx_tflite:
-      inference_time: 17581.0
-      throughput: 56.87958591661453
+      inference_time: 17413.0
+      throughput: 57.42835812324125
       estimated_peak_memory_range:
-        min: 90112
-        max: 2519456
+        min: 24576
+        max: 3013416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 837
-      job_id: jz5w9r64p
+      job_id: jz57vd8q5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14955.0
-      throughput: 66.86726847208291
+      inference_time: 14630.0
+      throughput: 68.3526999316473
       estimated_peak_memory_range:
-        min: 245760
-        max: 27630984
+        min: 12288
+        max: 29408864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jz57d8rn5
+      job_id: jegnredv5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.229422Z'
+    timestamp: '2024-06-08T23:19:48Z'
   - torchscript_onnx_qnn:
-      inference_time: 14657.0
-      throughput: 68.22678583611926
+      inference_time: 14162.0
+      throughput: 70.61149555147578
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jvgdvm16g
+      job_id: jo5mvzly5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 35485.0
-      throughput: 28.180921516133576
+      inference_time: 34948.0
+      throughput: 28.613940711914847
       estimated_peak_memory_range:
-        min: 241364992
-        max: 241364992
+        min: 211316736
+        max: 211316736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 624
-      job_id: jo5mzlx7p
+      job_id: jqpyvdkrp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.229561Z'
+    timestamp: '2024-06-08T23:19:50Z'
diff --git a/qai_hub_models/models/trocr/README.md b/qai_hub_models/models/trocr/README.md
index 8e1b963a..429f2e2f 100644
--- a/qai_hub_models/models/trocr/README.md
+++ b/qai_hub_models/models/trocr/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of TrOCR can be found
   [here](https://github.com/microsoft/unilm/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models](https://arxiv.org/abs/2109.10282)
diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py
index 8b74261b..7c62002b 100644
--- a/qai_hub_models/models/trocr/export.py
+++ b/qai_hub_models/models/trocr/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -237,7 +237,10 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False
+        model_cls=Model,
+        components=ALL_COMPONENTS,
+        supports_qnn=False,
+        supports_precompiled_ort=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml
index 521a6a8a..25bfdd7d 100644
--- a/qai_hub_models/models/trocr/perf.yaml
+++ b/qai_hub_models/models/trocr/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: TrOCREncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 149720.0
-      throughput: 6.67913438418381
+      inference_time: 148428.0
+      throughput: 6.737273290753766
       estimated_peak_memory_range:
-        min: 7241728
-        max: 10559328
+        min: 6459392
+        max: 9952352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 592
-      job_id: jopry73kg
+      job_id: j1p8w7dzp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 111138.0
-      throughput: 8.997822526948479
+      inference_time: 109810.0
+      throughput: 9.106638739641198
       estimated_peak_memory_range:
-        min: 14254080
-        max: 129724112
+        min: 14303232
+        max: 127415872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 396
-      job_id: j7gjl8v1p
+      job_id: jmg9947qg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.263746Z'
+    timestamp: '2024-06-08T23:20:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 111585.0
-      throughput: 8.961778016758524
+      inference_time: 111077.0
+      throughput: 9.00276384850149
       estimated_peak_memory_range:
-        min: 5992448
-        max: 348296944
+        min: 6410240
+        max: 350751520
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 592
-      job_id: jqpydy30p
+      job_id: jn5q92x7p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 84470.0
-      throughput: 11.838522552385463
+      inference_time: 83685.0
+      throughput: 11.9495728027723
       estimated_peak_memory_range:
-        min: 16490496
-        max: 91372800
+        min: 12636160
+        max: 89203248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 396
-      job_id: jygz7034p
+      job_id: jvgd7vykg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.263862Z'
+    timestamp: '2024-06-08T23:20:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 149520.0
-      throughput: 6.688068485821295
+      inference_time: 148360.0
+      throughput: 6.740361283364789
       estimated_peak_memory_range:
-        min: 16384
-        max: 12280096
+        min: 7380992
+        max: 9974128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 592
-      job_id: j1p87kqq5
+      job_id: jw56q19vg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.263936Z'
+    timestamp: '2024-06-08T23:20:26Z'
   - torchscript_onnx_ort:
-      inference_time: 111711.0
-      throughput: 8.951669934026192
+      inference_time: 109878.0
+      throughput: 9.101002930522943
       estimated_peak_memory_range:
-        min: 36442112
-        max: 36442112
+        min: 28672
+        max: 28672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 396
-      job_id: jmg94qlm5
+      job_id: jmg9947vg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,15 +156,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.264003Z'
+    timestamp: '2024-06-08T23:20:38Z'
 - name: TrOCRDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2734.0
-      throughput: 365.764447695684
+      inference_time: 2732.0
+      throughput: 366.03221083455344
       estimated_peak_memory_range:
-        min: 16384
-        max: 2242304
+        min: 12288
+        max: 2455200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -172,14 +172,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 370
-      job_id: jep2mzy65
+      job_id: jogkrywy5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2986.0
-      throughput: 334.8961821835231
+      inference_time: 2915.0
+      throughput: 343.0531732418525
       estimated_peak_memory_range:
-        min: 122880
-        max: 562369864
+        min: 28672
+        max: 588384064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -187,7 +187,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 353
-      job_id: jlpevnd85
+      job_id: jnp1q8kkg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -196,13 +196,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.264092Z'
+    timestamp: '2024-06-08T23:20:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 1972.0
-      throughput: 507.0993914807302
+      inference_time: 1997.0
+      throughput: 500.75112669003505
       estimated_peak_memory_range:
         min: 12288
-        max: 193171136
+        max: 195170736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -210,14 +210,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 370
-      job_id: j2p0rxz0p
+      job_id: j1glek9ep
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2087.0
-      throughput: 479.1566842357451
+      inference_time: 2106.0
+      throughput: 474.8338081671415
       estimated_peak_memory_range:
         min: 0
-        max: 47494016
+        max: 49553392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -225,7 +225,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 353
-      job_id: jz5w9re4p
+      job_id: jz5wm90jg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -234,13 +234,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.264179Z'
+    timestamp: '2024-06-08T23:20:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 2738.0
-      throughput: 365.23009495982467
+      inference_time: 2737.0
+      throughput: 365.36353671903544
       estimated_peak_memory_range:
         min: 16384
-        max: 2113536
+        max: 3465512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -248,7 +248,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 370
-      job_id: jogkykevp
+      job_id: j1p3qmlx5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -257,13 +257,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.264226Z'
+    timestamp: '2024-06-08T23:20:27Z'
   - torchscript_onnx_ort:
-      inference_time: 2608.0
-      throughput: 383.4355828220859
+      inference_time: 2812.0
+      throughput: 355.6187766714082
       estimated_peak_memory_range:
-        min: 356294656
-        max: 356294656
+        min: 352550912
+        max: 352550912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -271,7 +271,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 353
-      job_id: jnp18m4ng
+      job_id: jnp1q8klg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -280,4 +280,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.264271Z'
+    timestamp: '2024-06-08T23:20:39Z'
diff --git a/qai_hub_models/models/unet_segmentation/README.md b/qai_hub_models/models/unet_segmentation/README.md
index 78dfce4f..c0d3d342 100644
--- a/qai_hub_models/models/unet_segmentation/README.md
+++ b/qai_hub_models/models/unet_segmentation/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Unet-Segmentation can be found
   [here](https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE)
 
 ## References
 * [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597)
diff --git a/qai_hub_models/models/unet_segmentation/export.py b/qai_hub_models/models/unet_segmentation/export.py
index 27cd31cb..6274534f 100644
--- a/qai_hub_models/models/unet_segmentation/export.py
+++ b/qai_hub_models/models/unet_segmentation/export.py
@@ -187,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml
index 51abe949..b4a6ea65 100644
--- a/qai_hub_models/models/unet_segmentation/perf.yaml
+++ b/qai_hub_models/models/unet_segmentation/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Unet-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 155816.0
-      throughput: 6.417826153925143
+      inference_time: 159228.0
+      throughput: 6.280302459366443
       estimated_peak_memory_range:
-        min: 6438912
-        max: 229049848
+        min: 6418432
+        max: 111435960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jqp4w2l2g
+      job_id: jo5mvz7q5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 150601.0
-      throughput: 6.640062150981733
+      inference_time: 156519.0
+      throughput: 6.389000696401076
       estimated_peak_memory_range:
-        min: 10506240
-        max: 32628664
+        min: 9871360
+        max: 31082800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: jegnew6jg
+      job_id: jep23m1mg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 160595.0
-      throughput: 6.226843924157041
+      inference_time: 165647.0
+      throughput: 6.03693396197939
       estimated_peak_memory_range:
-        min: 7450624
-        max: 152630040
+        min: 13611008
+        max: 154509064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: j2p0rx40p
+      job_id: jn5q92nmp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.309399Z'
+    timestamp: '2024-06-08T23:21:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 120918.0
-      throughput: 8.270067318347971
+      inference_time: 121153.0
+      throughput: 8.254025901133279
       estimated_peak_memory_range:
-        min: 5656576
-        max: 336826176
+        min: 6619136
+        max: 339596672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j0px1zk8g
+      job_id: jegnre4m5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 110216.0
-      throughput: 9.073092835885896
+      inference_time: 110026.0
+      throughput: 9.0887608383473
       estimated_peak_memory_range:
-        min: 9908224
-        max: 92426640
+        min: 9850880
+        max: 91369248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: jopry7vkg
+      job_id: j2p0erwe5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 120104.0
-      throughput: 8.326117364950376
+      inference_time: 119057.0
+      throughput: 8.399338132155187
       estimated_peak_memory_range:
-        min: 22466560
-        max: 102883024
+        min: 22478848
+        max: 104785056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: j1p87k2q5
+      job_id: j1glekdlp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.309447Z'
+    timestamp: '2024-06-08T23:21:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 169891.0
-      throughput: 5.886126987303624
+      inference_time: 157133.0
+      throughput: 6.364035562230722
       estimated_peak_memory_range:
-        min: 4673536
-        max: 9033328
+        min: 6680576
+        max: 111633312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jo5mzln7p
+      job_id: jopr1yreg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 154323.0
-      throughput: 6.479915501901855
+      inference_time: 148329.0
+      throughput: 6.741769984291676
       estimated_peak_memory_range:
-        min: 10018816
-        max: 32111504
+        min: 9969664
+        max: 32982776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: jqpydy10p
+      job_id: jogkry1o5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.309477Z'
+    timestamp: '2024-06-08T23:21:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 190382.0
-      throughput: 5.252597409418958
+      inference_time: 190476.0
+      throughput: 5.25000525000525
       estimated_peak_memory_range:
-        min: 9850880
-        max: 9850880
+        min: 9854976
+        max: 9854976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: jep2mzk65
+      job_id: j1p8w7n8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 146588.0
-      throughput: 6.821840805522962
+      inference_time: 146401.0
+      throughput: 6.830554436103578
       estimated_peak_memory_range:
-        min: 11423744
-        max: 11423744
+        min: 17457152
+        max: 17457152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jogkykvvp
+      job_id: jw56q1x7g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.309509Z'
+    timestamp: '2024-06-08T23:21:31Z'
diff --git a/qai_hub_models/models/vit/README.md b/qai_hub_models/models/vit/README.md
index 314d20ef..924b05f0 100644
--- a/qai_hub_models/models/vit/README.md
+++ b/qai_hub_models/models/vit/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of VIT can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929)
diff --git a/qai_hub_models/models/vit/evaluate.py b/qai_hub_models/models/vit/evaluate.py
new file mode 100644
index 00000000..91e37600
--- /dev/null
+++ b/qai_hub_models/models/vit/evaluate.py
@@ -0,0 +1,56 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.vit import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+        supports_qnn=False,
+        supports_precompiled_ort=False,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py
index de6eba4d..a4f94916 100644
--- a/qai_hub_models/models/vit/export.py
+++ b/qai_hub_models/models/vit/export.py
@@ -190,7 +190,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -219,7 +219,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/vit/info.yaml b/qai_hub_models/models/vit/info.yaml
index 6667f41f..ac7afa54 100644
--- a/qai_hub_models/models/vit/info.yaml
+++ b/qai_hub_models/models/vit/info.yaml
@@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml
index 2de2633f..459d6f2d 100644
--- a/qai_hub_models/models/vit/perf.yaml
+++ b/qai_hub_models/models/vit/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: VIT
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 79254.0
-      throughput: 12.617659676483205
+      inference_time: 78496.0
+      throughput: 12.73950264981655
       estimated_peak_memory_range:
-        min: 139264
-        max: 3176768
+        min: 102400
+        max: 3437176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 535
-      job_id: j1glkq42p
+      job_id: jwgoevxdp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 104122.0
-      throughput: 9.604118245903843
+      inference_time: 103100.0
+      throughput: 9.699321047526674
       estimated_peak_memory_range:
-        min: 32768
-        max: 419050688
+        min: 110592
+        max: 441770400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 376
-      job_id: jygz7024p
+      job_id: jnp1q89lg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.343795Z'
+    timestamp: '2024-06-08T23:22:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 56896.0
-      throughput: 17.575928008998876
+      inference_time: 56654.0
+      throughput: 17.65100434214707
       estimated_peak_memory_range:
-        min: 114688
-        max: 373059376
+        min: 77824
+        max: 375276272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 535
-      job_id: jw56102np
+      job_id: j1pvzw8mg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 75468.0
-      throughput: 13.250649281814809
+      inference_time: 76545.0
+      throughput: 13.064210595074792
       estimated_peak_memory_range:
-        min: 622592
-        max: 510131728
+        min: 684032
+        max: 513094432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 376
-      job_id: jz5w9rw4p
+      job_id: jvgd7vklg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.343903Z'
+    timestamp: '2024-06-08T23:22:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 79120.0
-      throughput: 12.639029322548028
+      inference_time: 78627.0
+      throughput: 12.718277436504
       estimated_peak_memory_range:
-        min: 131072
-        max: 3222496
+        min: 110592
+        max: 6215968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 535
-      job_id: j1p3mrnmg
+      job_id: j7gjkl985
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.343996Z'
+    timestamp: '2024-06-08T23:21:57Z'
   - torchscript_onnx_ort:
-      inference_time: 103336.0
-      throughput: 9.677169621429124
+      inference_time: 102862.0
+      throughput: 9.721763138962883
       estimated_peak_memory_range:
-        min: 186150912
-        max: 186150912
+        min: 158560256
+        max: 158560256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 376
-      job_id: jmg94q0m5
+      job_id: jz57vdqr5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.344048Z'
+    timestamp: '2024-06-08T23:22:04Z'
diff --git a/qai_hub_models/models/whisper_base_en/README.md b/qai_hub_models/models/whisper_base_en/README.md
index d751e49d..441351db 100644
--- a/qai_hub_models/models/whisper_base_en/README.md
+++ b/qai_hub_models/models/whisper_base_en/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Whisper-Base-En can be found
   [here](https://github.com/openai/whisper/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
diff --git a/qai_hub_models/models/whisper_base_en/export.py b/qai_hub_models/models/whisper_base_en/export.py
index 095bc0e3..4bb6b358 100644
--- a/qai_hub_models/models/whisper_base_en/export.py
+++ b/qai_hub_models/models/whisper_base_en/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml
index ecd7e150..881707cf 100644
--- a/qai_hub_models/models/whisper_base_en/perf.yaml
+++ b/qai_hub_models/models/whisper_base_en/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: WhisperEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 159634.0
-      throughput: 6.264329654083716
+      inference_time: 158811.0
+      throughput: 6.296793043303046
       estimated_peak_memory_range:
-        min: 30449664
-        max: 132588528
+        min: 31092736
+        max: 131633968
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 419
         layers_on_cpu: 0
         total_layers: 419
-      job_id: jvgdvmn6g
+      job_id: j0pxe1w95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 605553.0
-      throughput: 1.6513831159287462
+      inference_time: 624615.0
+      throughput: 1.6009862075038224
       estimated_peak_memory_range:
-        min: 1105920
-        max: 76836912
+        min: 131072
+        max: 82142360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 580
-      job_id: jqp4w2nqg
+      job_id: j2p0erne5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 429986.0
-      throughput: 2.3256571144176785
+      inference_time: 394348.0
+      throughput: 2.5358312962155254
       estimated_peak_memory_range:
-        min: 73736192
-        max: 249023480
+        min: 4792320
+        max: 165488160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: j1p87k0z5
+      job_id: j1pvzwjmg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.369854Z'
+    timestamp: '2024-06-08T23:22:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 123349.0
-      throughput: 8.107078290055046
+      inference_time: 122023.0
+      throughput: 8.195176319218508
       estimated_peak_memory_range:
-        min: 17702912
-        max: 63075712
+        min: 37249024
+        max: 82154976
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 419
         layers_on_cpu: 0
         total_layers: 419
-      job_id: jmg94q0q5
+      job_id: jegnrejm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 457237.0
-      throughput: 2.187049604472079
+      inference_time: 452457.0
+      throughput: 2.210154777139043
       estimated_peak_memory_range:
         min: 0
-        max: 198167856
+        max: 198495008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 580
-      job_id: jo5mzleyp
+      job_id: jogkryjo5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 302992.0
-      throughput: 3.3004171727306333
+      inference_time: 300384.0
+      throughput: 3.3290721210184295
       estimated_peak_memory_range:
-        min: 74620928
-        max: 277528096
+        min: 62181376
+        max: 262749552
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: jn5q2de75
+      job_id: jlpe4vj05
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.370015Z'
+    timestamp: '2024-06-08T23:22:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 158470.0
-      throughput: 6.310342651605982
+      inference_time: 158001.0
+      throughput: 6.329073866621098
       estimated_peak_memory_range:
         min: 12288
-        max: 93808464
+        max: 104601560
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 419
         layers_on_cpu: 0
         total_layers: 419
-      job_id: jvgdvmnkg
+      job_id: jep23m2mg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 632689.0
-      throughput: 1.58055537554786
+      inference_time: 623834.0
+      throughput: 1.602990539149838
       estimated_peak_memory_range:
-        min: 77824
-        max: 80008712
+        min: 139264
+        max: 76510216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 580
-      job_id: jqpydyrrp
+      job_id: j1p3qmyz5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.370125Z'
+    timestamp: '2024-06-08T23:22:49Z'
   - torchscript_onnx_qnn:
-      inference_time: 463047.0
-      throughput: 2.159607987958026
+      inference_time: 454926.0
+      throughput: 2.198159700698575
       estimated_peak_memory_range:
         min: 962560
         max: 962560
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 579
-      job_id: jopry78vg
+      job_id: j1glekjlp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 389738.0
-      throughput: 2.5658262730347055
+      inference_time: 383597.0
+      throughput: 2.606902556589337
       estimated_peak_memory_range:
-        min: 138715136
-        max: 138715136
+        min: 139669504
+        max: 139669504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: jw5610evp
+      job_id: jz5wm9jjg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +216,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.370231Z'
+    timestamp: '2024-06-08T23:22:54Z'
 - name: WhisperDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 24968.0
-      throughput: 40.05126561999359
+      inference_time: 24389.0
+      throughput: 41.00209110664644
       estimated_peak_memory_range:
-        min: 5763072
-        max: 8595680
+        min: 5771264
+        max: 8649416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 983
-      job_id: jz5w9rwzp
+      job_id: jo5mvzjq5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23886.0
-      throughput: 41.86552792430712
+      inference_time: 22769.0
+      throughput: 43.91936404760859
       estimated_peak_memory_range:
-        min: 42430464
-        max: 59718408
+        min: 42414080
+        max: 60923784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +247,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: j0px1z9jg
+      job_id: j1p8w7l8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 24467.0
-      throughput: 40.87137777414477
+      inference_time: 24751.0
+      throughput: 40.402407983515815
       estimated_peak_memory_range:
-        min: 20480
-        max: 320053784
+        min: 12656640
+        max: 328987984
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +262,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 844
-      job_id: jogkyk7yp
+      job_id: j7gjklj85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +271,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.370494Z'
+    timestamp: '2024-06-08T23:22:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 19456.0
-      throughput: 51.39802631578947
+      inference_time: 18854.0
+      throughput: 53.039142887450936
       estimated_peak_memory_range:
-        min: 4390912
-        max: 91122240
+        min: 4575232
+        max: 93812240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +285,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 983
-      job_id: jnp18m2kg
+      job_id: jopr1yzeg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 18796.0
-      throughput: 53.20280910832092
+      inference_time: 18709.0
+      throughput: 53.450211128333954
       estimated_peak_memory_range:
-        min: 42418176
-        max: 323686720
+        min: 42438656
+        max: 323848592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +300,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: jegnewlvg
+      job_id: jn5q92jmp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 20598.0
-      throughput: 48.548402757549276
+      inference_time: 20257.0
+      throughput: 49.36565137976996
       estimated_peak_memory_range:
-        min: 52908032
-        max: 139842400
+        min: 52916224
+        max: 140494080
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +315,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 844
-      job_id: j1glkq6ep
+      job_id: jygzv716p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +324,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.370757Z'
+    timestamp: '2024-06-08T23:22:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 23198.0
-      throughput: 43.10716441072506
+      inference_time: 23324.0
+      throughput: 42.87429257417253
       estimated_peak_memory_range:
-        min: 5951488
-        max: 11437768
+        min: 5750784
+        max: 9075392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +338,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 983
-      job_id: jz57d82q5
+      job_id: jqpyvd94p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23510.0
-      throughput: 42.53509145044662
+      inference_time: 24053.0
+      throughput: 41.57485552737704
       estimated_peak_memory_range:
-        min: 42409984
-        max: 58496416
+        min: 42450944
+        max: 59016968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +353,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: j2p0rx32p
+      job_id: jwgoevjdp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,13 +362,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.370934Z'
+    timestamp: '2024-06-08T23:22:49Z'
   - torchscript_onnx_qnn:
-      inference_time: 13714.0
-      throughput: 72.91818579553741
+      inference_time: 13816.0
+      throughput: 72.37984944991314
       estimated_peak_memory_range:
-        min: 42463232
-        max: 42463232
+        min: 42455040
+        max: 42455040
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -376,14 +376,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: jep2mz0x5
+      job_id: jw56q1k7g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 19806.0
-      throughput: 50.48975058063213
+      inference_time: 20016.0
+      throughput: 49.96003197442047
       estimated_peak_memory_range:
-        min: 112349184
-        max: 112349184
+        min: 45969408
+        max: 45969408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +391,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 844
-      job_id: j1p3mrvxg
+      job_id: jmg9946vg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +400,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.371105Z'
+    timestamp: '2024-06-08T23:22:55Z'
diff --git a/qai_hub_models/models/whisper_base_en/requirements.txt b/qai_hub_models/models/whisper_base_en/requirements.txt
index 75b1cf12..fa34d4f8 100644
--- a/qai_hub_models/models/whisper_base_en/requirements.txt
+++ b/qai_hub_models/models/whisper_base_en/requirements.txt
@@ -1,2 +1,2 @@
 openai-whisper==20230314
-scipy
+scipy==1.8.1
diff --git a/qai_hub_models/models/whisper_small_en/README.md b/qai_hub_models/models/whisper_small_en/README.md
index f0f96498..5a1422a0 100644
--- a/qai_hub_models/models/whisper_small_en/README.md
+++ b/qai_hub_models/models/whisper_small_en/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Whisper-Small-En can be found
   [here](https://github.com/openai/whisper/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py
index dc34702d..e6937074 100644
--- a/qai_hub_models/models/whisper_small_en/export.py
+++ b/qai_hub_models/models/whisper_small_en/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml
index d14f6b03..aae6bde6 100644
--- a/qai_hub_models/models/whisper_small_en/perf.yaml
+++ b/qai_hub_models/models/whisper_small_en/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: WhisperEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 617428.0
-      throughput: 1.6196220449995788
+      inference_time: 610635.0
+      throughput: 1.6376395064154528
       estimated_peak_memory_range:
-        min: 48623616
-        max: 497398832
+        min: 8286208
+        max: 437557824
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,22 +48,22 @@ models:
         layers_on_gpu: 911
         layers_on_cpu: 0
         total_layers: 911
-      job_id: j7gjl8z7p
+      job_id: jz57vdzr5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1710031.0
-      throughput: 0.5847847202770008
+    torchscript_onnx_qnn:
+      inference_time: 1969063.0
+      throughput: 0.5078557669307686
       estimated_peak_memory_range:
-        min: 110481408
-        max: 507852736
+        min: 1097728
+        max: 226008440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 884
+        layers_on_npu: 1474
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 884
-      job_id: jqpydyjrp
+        total_layers: 1474
+      job_id: jep23m8mg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.470469Z'
+    timestamp: '2024-06-08T23:24:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 471828.0
-      throughput: 2.119416397500784
+      inference_time: 467725.0
+      throughput: 2.1380084451333583
       estimated_peak_memory_range:
-        min: 110387200
-        max: 210216224
+        min: 111644672
+        max: 209573760
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 911
         layers_on_cpu: 0
         total_layers: 911
-      job_id: jygz70ozp
+      job_id: j0pxe1v95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1470022.0
-      throughput: 0.6802619280527774
+      inference_time: 1435234.0
+      throughput: 0.6967504950412268
       estimated_peak_memory_range:
         min: 0
-        max: 568505552
+        max: 570396624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1474
-      job_id: jqp4w2kqg
+      job_id: j2p0erye5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1253665.0
-      throughput: 0.7976612571939075
+      inference_time: 1240429.0
+      throughput: 0.8061727031535058
       estimated_peak_memory_range:
-        min: 111489024
-        max: 678294336
+        min: 350531584
+        max: 914876112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,7 +116,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 884
-      job_id: j1p87kmz5
+      job_id: j1pvzw3mg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -125,13 +125,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.470810Z'
+    timestamp: '2024-06-08T23:24:10Z'
   - torchscript_onnx_tflite:
-      inference_time: 610437.0
-      throughput: 1.6381706875566193
+      inference_time: 611130.0
+      throughput: 1.6363130594145272
       estimated_peak_memory_range:
-        min: 0
-        max: 449077216
+        min: 68825088
+        max: 504071032
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -139,7 +139,7 @@ models:
         layers_on_gpu: 911
         layers_on_cpu: 0
         total_layers: 911
-      job_id: jmg94qjq5
+      job_id: jegnre2m5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +148,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.470911Z'
+    timestamp: '2024-06-08T23:23:58Z'
   - torchscript_onnx_qnn:
-      inference_time: 1702121.0
-      throughput: 0.5875022986027433
+      inference_time: 1682160.0
+      throughput: 0.5944737718171874
       estimated_peak_memory_range:
         min: 962560
         max: 962560
@@ -162,14 +162,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1473
-      job_id: jo5mzlqyp
+      job_id: jogkryzo5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1516833.0
-      throughput: 0.6592683571625881
+      inference_time: 1497981.0
+      throughput: 0.667565209438571
       estimated_peak_memory_range:
-        min: 74731520
-        max: 74731520
+        min: 555839488
+        max: 555839488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -177,7 +177,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 884
-      job_id: jn5q2dr75
+      job_id: jlpe4v905
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -186,15 +186,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.471142Z'
+    timestamp: '2024-06-08T23:24:12Z'
 - name: WhisperDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 26398.0
-      throughput: 37.88165770134101
+      inference_time: 26644.0
+      throughput: 37.53190211679928
       estimated_peak_memory_range:
-        min: 16756736
-        max: 20661456
+        min: 16855040
+        max: 20865456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -202,14 +202,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2573
-      job_id: jlpevne75
+      job_id: jqp4jwqlp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 25326.0
-      throughput: 39.48511411197978
+      inference_time: 24731.0
+      throughput: 40.43508147668918
       estimated_peak_memory_range:
-        min: 132386816
-        max: 205732832
+        min: 124076032
+        max: 200059296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -217,22 +217,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: jz57d80q5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 62664.0
-      throughput: 15.958125877696924
-      estimated_peak_memory_range:
-        min: 53420032
-        max: 713755144
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 2302
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 2302
-      job_id: j2p0rx22p
+      job_id: jqpyvde4p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -241,13 +226,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.471830Z'
+    timestamp: '2024-06-08T23:24:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 20598.0
-      throughput: 48.548402757549276
+      inference_time: 19793.0
+      throughput: 50.52291214065579
       estimated_peak_memory_range:
-        min: 16793600
-        max: 1152170896
+        min: 16777216
+        max: 1154461280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -255,14 +240,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2573
-      job_id: jz5w9r2zp
+      job_id: jo5mvzrq5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 19885.0
-      throughput: 50.28916268544129
+      inference_time: 19453.0
+      throughput: 51.40595280933532
       estimated_peak_memory_range:
-        min: 12111872
-        max: 804591888
+        min: 72151040
+        max: 864487680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -270,14 +255,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: j0px1znjg
+      job_id: j1p8w7o8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 53383.0
-      throughput: 18.732555307869546
+      inference_time: 53273.0
+      throughput: 18.77123495954799
       estimated_peak_memory_range:
-        min: 90411008
-        max: 358314384
+        min: 50139136
+        max: 319234896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,7 +270,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2302
-      job_id: jogkykqyp
+      job_id: j7gjklx85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -294,13 +279,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.472545Z'
-  - torchscript_onnx_qnn:
-      inference_time: 25743.0
-      throughput: 38.845511401157594
+    timestamp: '2024-06-08T23:24:11Z'
+  - torchscript_onnx_tflite:
+      inference_time: 27029.0
+      throughput: 36.997299197158604
+      estimated_peak_memory_range:
+        min: 16769024
+        max: 20284792
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 2573
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 2573
+      job_id: jopr1ykeg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 25818.0
+      throughput: 38.73266713145867
       estimated_peak_memory_range:
-        min: 127111168
-        max: 202162640
+        min: 127201280
+        max: 197556544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -308,7 +308,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: jep2mz9x5
+      job_id: jw56q167g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -317,13 +317,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.472758Z'
+    timestamp: '2024-06-08T23:24:07Z'
   - torchscript_onnx_qnn:
-      inference_time: 20785.0
-      throughput: 48.11161895597787
+      inference_time: 20402.0
+      throughput: 49.01480247034605
       estimated_peak_memory_range:
-        min: 127361024
-        max: 127361024
+        min: 127381504
+        max: 127381504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -331,14 +331,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: jegnewmvg
+      job_id: jn5q928mp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 53319.0
-      throughput: 18.7550404171121
+      inference_time: 53485.0
+      throughput: 18.696830887164626
       estimated_peak_memory_range:
-        min: 309313536
-        max: 309313536
+        min: 342065152
+        max: 342065152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -346,7 +346,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2302
-      job_id: j1glkq2ep
+      job_id: jygzv7e6p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -355,4 +355,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.473190Z'
+    timestamp: '2024-06-08T23:24:13Z'
diff --git a/qai_hub_models/models/whisper_tiny_en/README.md b/qai_hub_models/models/whisper_tiny_en/README.md
index e541696e..2ce1b0c2 100644
--- a/qai_hub_models/models/whisper_tiny_en/README.md
+++ b/qai_hub_models/models/whisper_tiny_en/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Whisper-Tiny-En can be found
   [here](https://github.com/openai/whisper/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py
index 4c2d1226..050e09fa 100644
--- a/qai_hub_models/models/whisper_tiny_en/export.py
+++ b/qai_hub_models/models/whisper_tiny_en/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml
index 08635966..cf5d7cdb 100644
--- a/qai_hub_models/models/whisper_tiny_en/perf.yaml
+++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: WhisperEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 68848.0
-      throughput: 14.524750174297003
+      inference_time: 68470.0
+      throughput: 14.604936468526361
       estimated_peak_memory_range:
-        min: 12288
-        max: 48238640
+        min: 16613376
+        max: 64496288
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 271
         layers_on_cpu: 0
         total_layers: 271
-      job_id: jwgov9n45
+      job_id: jnp1q80lg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 287627.0
-      throughput: 3.4767250640586593
+      inference_time: 286944.0
+      throughput: 3.485000557600089
       estimated_peak_memory_range:
-        min: 1114112
-        max: 47243920
+        min: 1019904
+        max: 52873616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 338
-      job_id: jmg94qyq5
+      job_id: jegnreym5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.534187Z'
+    timestamp: '2024-06-08T23:24:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 53307.0
-      throughput: 18.75926238580299
+      inference_time: 54112.0
+      throughput: 18.48018923713779
       estimated_peak_memory_range:
         min: 0
-        max: 32147600
+        max: 36724816
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 271
         layers_on_cpu: 0
         total_layers: 271
-      job_id: j7gjl827p
+      job_id: jz57vdwr5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 217027.0
-      throughput: 4.607721619890612
+      inference_time: 218003.0
+      throughput: 4.587092838171952
       estimated_peak_memory_range:
-        min: 974848
-        max: 136277600
+        min: 406650880
+        max: 543573456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 338
-      job_id: jvgdvmqkg
+      job_id: jep23m6mg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.534272Z'
+    timestamp: '2024-06-08T23:24:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 68587.0
-      throughput: 14.580022453234578
+      inference_time: 68514.0
+      throughput: 14.595557112414982
       estimated_peak_memory_range:
-        min: 12288
-        max: 47889640
+        min: 18030592
+        max: 66868584
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -124,14 +124,14 @@ models:
         layers_on_gpu: 271
         layers_on_cpu: 0
         total_layers: 271
-      job_id: jygz70jzp
+      job_id: j0pxe1j95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 293684.0
-      throughput: 3.405020362021765
+      inference_time: 288936.0
+      throughput: 3.4609740565384723
       estimated_peak_memory_range:
-        min: 57344
-        max: 51548904
+        min: 159744
+        max: 53294424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -139,7 +139,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 338
-      job_id: jo5mzl6yp
+      job_id: jogkryno5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +148,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.534349Z'
+    timestamp: '2024-06-08T23:24:50Z'
   - torchscript_onnx_qnn:
-      inference_time: 239161.0
-      throughput: 4.181283737733159
+      inference_time: 237871.0
+      throughput: 4.203959288858247
       estimated_peak_memory_range:
         min: 962560
         max: 962560
@@ -162,7 +162,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 337
-      job_id: jqp4w2dqg
+      job_id: j2p0erqe5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -171,15 +171,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.534395Z'
+    timestamp: '2024-06-08T23:24:48Z'
 - name: WhisperDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3779.0
-      throughput: 264.6202699126753
+      inference_time: 3853.0
+      throughput: 259.53802232026993
       estimated_peak_memory_range:
-        min: 2977792
-        max: 7682616
+        min: 2973696
+        max: 6011536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -187,14 +187,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: j1pvwnr7g
+      job_id: jvgd7vwlg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3638.0
-      throughput: 274.8763056624519
+      inference_time: 3672.0
+      throughput: 272.33115468409585
       estimated_peak_memory_range:
-        min: 9932800
-        max: 46815096
+        min: 21250048
+        max: 48536944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -202,14 +202,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: jnp18mwkg
+      job_id: jopr1yqeg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5435.0
-      throughput: 183.99264029438822
+      inference_time: 5299.0
+      throughput: 188.71485185884129
       estimated_peak_memory_range:
-        min: 6512640
-        max: 215990552
+        min: 6336512
+        max: 214237680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -217,7 +217,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 462
-      job_id: jep2mzlx5
+      job_id: jw56q1j7g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -226,13 +226,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.534555Z'
+    timestamp: '2024-06-08T23:24:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 3165.0
-      throughput: 315.955766192733
+      inference_time: 2973.0
+      throughput: 336.3605785401951
       estimated_peak_memory_range:
-        min: 2949120
-        max: 226145904
+        min: 942080
+        max: 226696352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -240,14 +240,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: jlpevnw75
+      job_id: jqp4jwolp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2774.0
-      throughput: 360.49026676279743
+      inference_time: 2764.0
+      throughput: 361.794500723589
       estimated_peak_memory_range:
         min: 0
-        max: 135656672
+        max: 138707216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -255,14 +255,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: jz57d8lq5
+      job_id: jqpyvdw4p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4316.0
-      throughput: 231.69601482854495
+      inference_time: 4502.0
+      throughput: 222.1235006663705
       estimated_peak_memory_range:
-        min: 27541504
-        max: 88893920
+        min: 27127808
+        max: 85392304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -270,7 +270,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 462
-      job_id: j2p0rxl2p
+      job_id: jwgoev0dp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -279,13 +279,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.534716Z'
+    timestamp: '2024-06-08T23:24:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 3880.0
-      throughput: 257.7319587628866
+      inference_time: 3909.0
+      throughput: 255.81990278843693
       estimated_peak_memory_range:
-        min: 159744
-        max: 2928728
+        min: 2981888
+        max: 5533208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -293,14 +293,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: jz5w9r3zp
+      job_id: jo5mvz2q5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3676.0
-      throughput: 272.0348204570185
+      inference_time: 3717.0
+      throughput: 269.03416733925206
       estimated_peak_memory_range:
-        min: 21233664
-        max: 48029776
+        min: 21213184
+        max: 37347800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -308,7 +308,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: jegnew3vg
+      job_id: jn5q92kmp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -317,13 +317,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.534826Z'
+    timestamp: '2024-06-08T23:24:50Z'
   - torchscript_onnx_qnn:
-      inference_time: 3678.0
-      throughput: 271.8868950516585
+      inference_time: 3772.0
+      throughput: 265.11134676564154
       estimated_peak_memory_range:
-        min: 21233664
-        max: 21233664
+        min: 21229568
+        max: 21229568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -331,14 +331,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: j0px1z6jg
+      job_id: j1p8w798p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4599.0
-      throughput: 217.43857360295718
+      inference_time: 4450.0
+      throughput: 224.7191011235955
       estimated_peak_memory_range:
-        min: 18477056
-        max: 18477056
+        min: 19857408
+        max: 19857408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -346,7 +346,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 462
-      job_id: jogkyk3yp
+      job_id: j7gjklm85
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -355,4 +355,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.534928Z'
+    timestamp: '2024-06-08T23:24:56Z'
diff --git a/qai_hub_models/models/whisper_tiny_en/requirements.txt b/qai_hub_models/models/whisper_tiny_en/requirements.txt
index 75b1cf12..fa34d4f8 100644
--- a/qai_hub_models/models/whisper_tiny_en/requirements.txt
+++ b/qai_hub_models/models/whisper_tiny_en/requirements.txt
@@ -1,2 +1,2 @@
 openai-whisper==20230314
-scipy
+scipy==1.8.1
diff --git a/qai_hub_models/models/wideresnet50/README.md b/qai_hub_models/models/wideresnet50/README.md
index 1fd5bb18..d212e4b8 100644
--- a/qai_hub_models/models/wideresnet50/README.md
+++ b/qai_hub_models/models/wideresnet50/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of WideResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Wide Residual Networks](https://arxiv.org/abs/1605.07146)
diff --git a/qai_hub_models/models/wideresnet50/evaluate.py b/qai_hub_models/models/wideresnet50/evaluate.py
new file mode 100644
index 00000000..8a6a9482
--- /dev/null
+++ b/qai_hub_models/models/wideresnet50/evaluate.py
@@ -0,0 +1,54 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.wideresnet50 import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+    torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/wideresnet50/export.py b/qai_hub_models/models/wideresnet50/export.py
index 5495b5f9..a5bd28dc 100644
--- a/qai_hub_models/models/wideresnet50/export.py
+++ b/qai_hub_models/models/wideresnet50/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/wideresnet50/info.yaml b/qai_hub_models/models/wideresnet50/info.yaml
index abeab0e0..59d4817f 100644
--- a/qai_hub_models/models/wideresnet50/info.yaml
+++ b/qai_hub_models/models/wideresnet50/info.yaml
@@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml
index 75b89214..8a782d43 100644
--- a/qai_hub_models/models/wideresnet50/perf.yaml
+++ b/qai_hub_models/models/wideresnet50/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: WideResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 4883.0
-      throughput: 204.7921359819783
+      inference_time: 4868.0
+      throughput: 205.42317173377157
       estimated_peak_memory_range:
-        min: 20480
-        max: 2355000
+        min: 24576
+        max: 2240024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jw5610nvp
+      job_id: jz5wm9vjg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5682.0
-      throughput: 175.99436818021823
+      inference_time: 5652.0
+      throughput: 176.92852087756546
       estimated_peak_memory_range:
         min: 622592
-        max: 354632496
+        max: 250014320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j1pvwnv7g
+      job_id: jvgd7v9lg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5434.0
-      throughput: 184.0264998159735
+      inference_time: 5471.0
+      throughput: 182.78194114421495
       estimated_peak_memory_range:
-        min: 622592
-        max: 484949896
+        min: 20480
+        max: 445804176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jz5w9rqzp
+      job_id: jvgd7vleg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.594875Z'
+    timestamp: '2024-06-08T23:25:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 3633.0
-      throughput: 275.2546105147261
+      inference_time: 3644.0
+      throughput: 274.423710208562
       estimated_peak_memory_range:
         min: 16384
-        max: 96201536
+        max: 100476704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j1p3mrexg
+      job_id: jmg9941vg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 4161.0
-      throughput: 240.3268445085316
+      inference_time: 4212.0
+      throughput: 237.41690408357076
       estimated_peak_memory_range:
         min: 618496
-        max: 54082784
+        max: 53808800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j7gjl8e7p
+      job_id: jz5wm9n6g
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4144.0
-      throughput: 241.3127413127413
+      inference_time: 4064.0
+      throughput: 246.06299212598427
       estimated_peak_memory_range:
-        min: 528384
-        max: 35985184
+        min: 618496
+        max: 31598192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jmg94qwq5
+      job_id: jz57vd3l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.594940Z'
+    timestamp: '2024-06-08T23:25:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 4880.0
-      throughput: 204.91803278688525
+      inference_time: 4872.0
+      throughput: 205.2545155993432
       estimated_peak_memory_range:
-        min: 24576
-        max: 2303568
+        min: 20480
+        max: 2441976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jwgov9345
+      job_id: jnp1q8llg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5681.0
-      throughput: 176.0253476500616
+      inference_time: 5687.0
+      throughput: 175.83963425356075
       estimated_peak_memory_range:
-        min: 16384
-        max: 344577184
+        min: 618496
+        max: 354920904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jygz70rzp
+      job_id: jnp1q8x2g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.594981Z'
+    timestamp: '2024-06-08T23:25:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 5868.0
-      throughput: 170.41581458759373
+      inference_time: 5842.0
+      throughput: 171.17425539198905
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jlpevnk75
+      job_id: jmg994elg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5093.0
-      throughput: 196.34792852935402
+      inference_time: 5121.0
+      throughput: 195.27436047646944
       estimated_peak_memory_range:
-        min: 49831936
-        max: 49831936
+        min: 71557120
+        max: 71557120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jnp18mekg
+      job_id: jqp4jw0vp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.595027Z'
+    timestamp: '2024-06-08T23:25:30Z'
diff --git a/qai_hub_models/models/wideresnet50_quantized/README.md b/qai_hub_models/models/wideresnet50_quantized/README.md
index 5fd6c471..ee7cb919 100644
--- a/qai_hub_models/models/wideresnet50_quantized/README.md
+++ b/qai_hub_models/models/wideresnet50_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of WideResNet50-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Wide Residual Networks](https://arxiv.org/abs/1605.07146)
diff --git a/qai_hub_models/models/wideresnet50_quantized/evaluate.py b/qai_hub_models/models/wideresnet50_quantized/evaluate.py
new file mode 100644
index 00000000..232037a3
--- /dev/null
+++ b/qai_hub_models/models/wideresnet50_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.wideresnet50_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py
index 26cd34f6..a10d2988 100644
--- a/qai_hub_models/models/wideresnet50_quantized/export.py
+++ b/qai_hub_models/models/wideresnet50_quantized/export.py
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/wideresnet50_quantized/info.yaml b/qai_hub_models/models/wideresnet50_quantized/info.yaml
index ec14612f..d6bd4260 100644
--- a/qai_hub_models/models/wideresnet50_quantized/info.yaml
+++ b/qai_hub_models/models/wideresnet50_quantized/info.yaml
@@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml
index 4884a99a..cd023541 100644
--- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml
+++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: WideResNet50-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1818.0
-      throughput: 550.05500550055
+      inference_time: 1803.0
+      throughput: 554.6311702717693
       estimated_peak_memory_range:
-        min: 24576
-        max: 2095776
+        min: 12288
+        max: 2605960
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jz57d8xq5
+      job_id: jo5mvzyw5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2048.0
-      throughput: 488.28125
+      inference_time: 2049.0
+      throughput: 488.0429477794046
       estimated_peak_memory_range:
         min: 16384
-        max: 250400872
+        max: 124262304
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +69,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jopry7yvg
+      job_id: j2p0er765
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 2137.0
-      throughput: 467.94571829667757
+      inference_time: 2037.0
+      throughput: 490.9180166912126
       estimated_peak_memory_range:
         min: 12288
-        max: 291422160
+        max: 210986456
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 86
+        layers_on_npu: 83
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 86
-      job_id: jogkykyyp
+        total_layers: 83
+      job_id: j1glekr8p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.629154Z'
+    timestamp: '2024-06-08T23:27:50Z'
   - torchscript_onnx_tflite:
-      inference_time: 1382.0
-      throughput: 723.589001447178
+      inference_time: 1386.0
+      throughput: 721.5007215007215
       estimated_peak_memory_range:
         min: 12288
-        max: 54370528
+        max: 56539024
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jqp4w2vqg
+      job_id: jegnre8r5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1538.0
-      throughput: 650.1950585175553
+      inference_time: 1532.0
+      throughput: 652.7415143603133
       estimated_peak_memory_range:
-        min: 167936
-        max: 43727936
+        min: 172032
+        max: 45717904
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +122,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jep2mzmx5
+      job_id: j1p8w7vxp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1619.0
-      throughput: 617.6652254478073
+      inference_time: 1574.0
+      throughput: 635.3240152477764
       estimated_peak_memory_range:
-        min: 618496
-        max: 30723328
+        min: 12288
+        max: 29772112
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 86
+        layers_on_npu: 83
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 86
-      job_id: jn5q2d275
+        total_layers: 83
+      job_id: jw56q1l0g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.629211Z'
+    timestamp: '2024-06-08T23:27:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 1829.0
-      throughput: 546.7468562055768
+      inference_time: 1824.0
+      throughput: 548.2456140350877
       estimated_peak_memory_range:
-        min: 12288
-        max: 2066200
+        min: 24576
+        max: 86925416
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: j0px1zyjg
+      job_id: jopr1yj9g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2028.0
-      throughput: 493.0966469428008
+      inference_time: 2034.0
+      throughput: 491.6420845624385
       estimated_peak_memory_range:
-        min: 0
-        max: 218893968
+        min: 12288
+        max: 7539488
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: j2p0rxr2p
+      job_id: jn5q92o4p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.629249Z'
+    timestamp: '2024-06-08T23:27:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 8003.0
-      throughput: 124.95314257153568
+      inference_time: 7862.0
+      throughput: 127.1940981938438
       estimated_peak_memory_range:
-        min: 40960
-        max: 27300016
+        min: 12288
+        max: 27235632
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jo5mzl3yp
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 8575.0
-      throughput: 116.61807580174927
-      estimated_peak_memory_range:
-        min: 184320
-        max: 42300288
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 78
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 78
-      job_id: j1p87k7z5
+      job_id: jep23mn4g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:43.629286Z'
+    timestamp: '2024-06-08T23:27:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 23877.0
-      throughput: 41.88130837207354
+      inference_time: 23597.0
+      throughput: 42.3782684239522
       estimated_peak_memory_range:
-        min: 49152
-        max: 1969856
+        min: 53248
+        max: 3084328
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jegnewevg
+      job_id: jqpyvd07p
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:43.629307Z'
+    timestamp: '2024-06-08T23:27:45Z'
   - torchscript_onnx_qnn:
-      inference_time: 1947.0
-      throughput: 513.6106831022086
+      inference_time: 1964.0
+      throughput: 509.1649694501018
       estimated_peak_memory_range:
-        min: 331776
-        max: 331776
+        min: 368640
+        max: 368640
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,22 +244,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jqpydydrp
+      job_id: jogkrym25
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1934.0
-      throughput: 517.063081695967
+      inference_time: 1848.0
+      throughput: 541.1255411255411
       estimated_peak_memory_range:
-        min: 117841920
-        max: 117841920
+        min: 23400448
+        max: 23400448
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 86
+        layers_on_npu: 83
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 86
-      job_id: jw56101vp
+        total_layers: 83
+      job_id: j1p3qm2l5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.629346Z'
+    timestamp: '2024-06-08T23:27:52Z'
diff --git a/qai_hub_models/models/xlsr/README.md b/qai_hub_models/models/xlsr/README.md
index 1b462ab6..dc29fc0f 100644
--- a/qai_hub_models/models/xlsr/README.md
+++ b/qai_hub_models/models/xlsr/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of XLSR can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices](https://arxiv.org/abs/2105.10288)
diff --git a/qai_hub_models/models/xlsr/demo.py b/qai_hub_models/models/xlsr/demo.py
index 942a23f3..9b01472b 100644
--- a/qai_hub_models/models/xlsr/demo.py
+++ b/qai_hub_models/models/xlsr/demo.py
@@ -3,16 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID, XLSR
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "xlsr_demo.jpg"
-)
+from qai_hub_models.models.xlsr.model import MODEL_ID, XLSR
 
 
 def main(is_test: bool = False):
-    super_resolution_demo(XLSR, MODEL_ID, IMAGE_ADDRESS, is_test)
+    super_resolution_demo(XLSR, MODEL_ID, is_test=is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/xlsr/export.py b/qai_hub_models/models/xlsr/export.py
index 9f2e8c9d..dfc3b401 100644
--- a/qai_hub_models/models/xlsr/export.py
+++ b/qai_hub_models/models/xlsr/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/xlsr/info.yaml b/qai_hub_models/models/xlsr/info.yaml
index cec3ec6d..b7ff7c32 100644
--- a/qai_hub_models/models/xlsr/info.yaml
+++ b/qai_hub_models/models/xlsr/info.yaml
@@ -11,13 +11,14 @@ research_paper: https://arxiv.org/abs/2105.10288
 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image
   Super Resolution for Mobile Devices
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr
 technical_details:
-  Model checkpoint: xlsr_4x_checkpoint_float32
-  Input resolution: 128x128
-  Number of parameters: 28.0K
-  Model size: 116 KB
+  Model checkpoint: xlsr_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 22.0K
+  Model size: 92.7 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/xlsr/model.py b/qai_hub_models/models/xlsr/model.py
index 5ad0eed8..4c3e804c 100644
--- a/qai_hub_models/models/xlsr/model.py
+++ b/qai_hub_models/models/xlsr/model.py
@@ -4,86 +4,41 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import torch
+from pathlib import Path
 
-from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
-from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
+from qai_hub_models.models._shared.super_resolution.model import (
+    DEFAULT_SCALE_FACTOR,
+    SuperResolutionModel,
+    validate_scale_factor,
+)
 from qai_hub_models.utils.aimet.repo import aimet_zoo_as_root
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel
-from qai_hub_models.utils.input_spec import InputSpec
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch
 
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 2
-# Weights and config stored in S3 are sourced from
-# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/xlsr/model/model_cards/xlsr_4x_w8a8.json
-# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/xlsr_4x_checkpoint_float32.pth.tar
-XLSR_WEIGHTS = "xlsr_4x_checkpoint_float32.pth.tar"
-XLSR_SOURCE_REPOSITORY = "https://github.com/quic/aimet-model-zoo"
-XLSR_SOURCE_REPO_COMMIT = "d09d2b0404d10f71a7640a87e9d5e5257b028802"
-SCALING_FACTOR = 4
+BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/xlsr_{scale_factor}x_checkpoint_float32.pth.tar"
 
 
-class XLSR(BaseModel):
+class XLSR(SuperResolutionModel):
     """Exportable XLSR super resolution model, end-to-end."""
 
-    def __init__(
-        self,
-        xlsr_model: torch.nn.Module,
-    ) -> None:
-        super().__init__()
-        self.model = xlsr_model
-
     @classmethod
-    def from_pretrained(cls) -> XLSR:
-        model = _load_xlsr_source_model()
-        dst = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, XLSR_WEIGHTS
-        ).fetch()
-        checkpoint = torch.load(dst, map_location=torch.device("cpu"))
-        model.load_state_dict(checkpoint["state_dict"])
-        model.eval()
-
-        return cls(model)
-
-    def get_evaluator(self) -> BaseEvaluator:
-        return SuperResolutionOutputEvaluator()
-
-    def forward(self, image):
-        """
-        Run XLSR on `image`, and produce an upscaled image
-
-        Parameters:
-            image: Pixel values pre-processed for model consumption.
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-
-        Returns:
-            image: Pixel values
-                   Range: float[0, 1]
-                   3-channel Color Space: RGB
-        """
-        return self.model(image)
-
-    @staticmethod
-    def get_input_spec(
-        batch_size: int = 1,
-        num_channels: int = 3,
-        height: int = 128,
-        width: int = 128,
-    ) -> InputSpec:
-        # Get the input specification ordered (name -> (shape, type)) pairs for this model.
-        #
-        # This can be used with the qai_hub python API to declare
-        # the model input specification upon submitting a profile job.
-        return {"image": ((batch_size, num_channels, height, width), "float32")}
-
-
-def _load_xlsr_source_model() -> torch.nn.Module:
-    # Load XLSR model from the source repository using the given weights.
-    # Returns <source repository>.utils.super_resolution.models.XLSRRelease
-    with aimet_zoo_as_root():
-        # necessary import. `modeling.deeplab` comes from the XLSR repo.
-        from aimet_zoo_torch.common.super_resolution.models import XLSRRelease
-
-        return XLSRRelease(scaling_factor=SCALING_FACTOR)
+    def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> XLSR:
+        validate_scale_factor(scale_factor)
+        with aimet_zoo_as_root():
+            from aimet_zoo_torch.common.super_resolution.models import XLSRRelease
+
+            model = XLSRRelease(scaling_factor=scale_factor)
+
+            url = BASE_ASSET_URL.format(scale_factor=scale_factor)
+            checkpoint_asset = CachedWebModelAsset(
+                url,
+                MODEL_ID,
+                MODEL_ASSET_VERSION,
+                Path(url).name,
+            )
+            checkpoint = load_torch(checkpoint_asset)
+            model.load_state_dict(checkpoint["state_dict"])
+            model.eval()
+
+        return cls(model, scale_factor)
diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml
index 90b9cd40..9274c714 100644
--- a/qai_hub_models/models/xlsr/perf.yaml
+++ b/qai_hub_models/models/xlsr/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: XLSR
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2487.0
-      throughput: 402.09087253719343
+      inference_time: 2486.0
+      throughput: 402.2526146419952
       estimated_peak_memory_range:
-        min: 16384
-        max: 16546256
+        min: 32768
+        max: 7588944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: j1pvwnw7g
+      job_id: j1gle1wmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1371.0
-      throughput: 729.3946024799417
+      inference_time: 1374.0
+      throughput: 727.802037845706
       estimated_peak_memory_range:
-        min: 217088
-        max: 3511184
+        min: 24576
+        max: 15889328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jygz707zp
+      job_id: jwgoe4dkp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1549.0
-      throughput: 645.577792123951
+      inference_time: 1554.0
+      throughput: 643.5006435006435
       estimated_peak_memory_range:
-        min: 12288
-        max: 13112960
+        min: 221184
+        max: 17637032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 23
-      job_id: jvgdvmvkg
+      job_id: jygzv4zxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.672571Z'
+    timestamp: '2024-06-11T11:59:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 1871.0
-      throughput: 534.4735435595938
+      inference_time: 1792.0
+      throughput: 558.0357142857143
       estimated_peak_memory_range:
         min: 16384
-        max: 20329008
+        max: 20986272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: j7gjl8l7p
+      job_id: jw56qdoyg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 834.0
-      throughput: 1199.0407673860911
+      inference_time: 840.0
+      throughput: 1190.4761904761904
       estimated_peak_memory_range:
-        min: 0
-        max: 17215088
+        min: 212992
+        max: 20099296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jz5w9r9zp
+      job_id: j1pvz92rg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 995.0
-      throughput: 1005.0251256281407
+      inference_time: 1035.0
+      throughput: 966.1835748792271
       estimated_peak_memory_range:
-        min: 0
-        max: 15534560
+        min: 212992
+        max: 14654368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 23
-      job_id: jz5w9r9jp
+      job_id: jz5wm1ymg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.672614Z'
+    timestamp: '2024-06-11T11:59:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 2515.0
-      throughput: 397.61431411530816
+      inference_time: 2862.0
+      throughput: 349.4060097833683
       estimated_peak_memory_range:
-        min: 20480
-        max: 7451512
+        min: 28672
+        max: 1426392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: jlpevnv75
+      job_id: j1p3qwon5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 1370.0
       throughput: 729.92700729927
       estimated_peak_memory_range:
-        min: 28672
-        max: 3445512
+        min: 217088
+        max: 9171344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jnp18m8kg
+      job_id: jlpe4l6v5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.672640Z'
+    timestamp: '2024-06-11T11:59:34Z'
   - torchscript_onnx_qnn:
-      inference_time: 3622.0
-      throughput: 276.09055770292656
+      inference_time: 3631.0
+      throughput: 275.40622418066647
       estimated_peak_memory_range:
-        min: 212992
-        max: 212992
+        min: 221184
+        max: 221184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jmg94q4q5
+      job_id: j7gjkw3e5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1535.0
-      throughput: 651.4657980456026
+      inference_time: 1489.0
+      throughput: 671.591672263264
       estimated_peak_memory_range:
-        min: 8708096
-        max: 8708096
+        min: 8957952
+        max: 8957952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 23
-      job_id: jmg94q4v5
+      job_id: jnp1qvo7g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.672667Z'
+    timestamp: '2024-06-11T11:59:36Z'
diff --git a/qai_hub_models/models/xlsr/test.py b/qai_hub_models/models/xlsr/test.py
index 1ce0cdd8..03726f7e 100644
--- a/qai_hub_models/models/xlsr/test.py
+++ b/qai_hub_models/models/xlsr/test.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.xlsr.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
 from qai_hub_models.models.xlsr.demo import main as demo_main
 from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID, XLSR
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
diff --git a/qai_hub_models/models/xlsr_quantized/README.md b/qai_hub_models/models/xlsr_quantized/README.md
index bd67774d..483777d1 100644
--- a/qai_hub_models/models/xlsr_quantized/README.md
+++ b/qai_hub_models/models/xlsr_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of XLSR-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices](https://arxiv.org/abs/2105.10288)
diff --git a/qai_hub_models/models/xlsr_quantized/demo.py b/qai_hub_models/models/xlsr_quantized/demo.py
index af51277d..3f5096e5 100644
--- a/qai_hub_models/models/xlsr_quantized/demo.py
+++ b/qai_hub_models/models/xlsr_quantized/demo.py
@@ -3,26 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
-from qai_hub_models.models.xlsr_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    XLSRQuantizable,
-)
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import TargetRuntime
-
-IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "xlsr_quantized_demo.jpg"
-)
+from qai_hub_models.models.xlsr_quantized.model import MODEL_ID, XLSRQuantizable
 
 
 def main(is_test: bool = False):
     super_resolution_demo(
         XLSRQuantizable,
         MODEL_ID,
-        IMAGE_ADDRESS,
-        is_test,
-        available_target_runtimes=[TargetRuntime.TFLITE],
+        is_test=is_test,
     )
 
 
diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py
index 45d6057b..a0ddab0f 100644
--- a/qai_hub_models/models/xlsr_quantized/export.py
+++ b/qai_hub_models/models/xlsr_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/xlsr_quantized/info.yaml b/qai_hub_models/models/xlsr_quantized/info.yaml
index 38920617..cadc40fc 100644
--- a/qai_hub_models/models/xlsr_quantized/info.yaml
+++ b/qai_hub_models/models/xlsr_quantized/info.yaml
@@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2105.10288
 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image
   Super Resolution for Mobile Devices
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
-deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr
 technical_details:
-  Model checkpoint: xlsr_4x_checkpoint_w8a8
-  Input resolution: 128x128
-  Number of parameters: 28.0K
-  Model size: 47.0 KB
+  Model checkpoint: xlsr_3x_checkpoint
+  Input resolution: 640x360
+  Number of parameters: 22.0K
+  Model size: 39.0 KB
 applicable_scenarios:
   - Virtual Real Estate Tours
   - Gaming
diff --git a/qai_hub_models/models/xlsr_quantized/model.py b/qai_hub_models/models/xlsr_quantized/model.py
index 7ff4cd2c..c4115c75 100644
--- a/qai_hub_models/models/xlsr_quantized/model.py
+++ b/qai_hub_models/models/xlsr_quantized/model.py
@@ -12,12 +12,12 @@
 )
 
 # isort: on
-
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
 from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
+from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR
 from qai_hub_models.models.xlsr.model import XLSR
 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
@@ -25,7 +25,6 @@
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 3
 DEFAULT_ENCODINGS = "xlsr_quantized_encodings.json"
-SCALING_FACTOR = 4
 
 
 class XLSRQuantizable(AIMETQuantizableMixin, XLSR):
@@ -37,14 +36,16 @@ class XLSRQuantizable(AIMETQuantizableMixin, XLSR):
     def __init__(
         self,
         xlsr_model: QuantizationSimModel,
+        scale_factor: int,
     ) -> None:
-        XLSR.__init__(self, xlsr_model.model)
+        XLSR.__init__(self, xlsr_model.model, scale_factor)
         AIMETQuantizableMixin.__init__(self, xlsr_model)
 
     @classmethod
     def from_pretrained(
         cls,
         aimet_encodings: str | None = "DEFAULT",
+        scale_factor: int = DEFAULT_SCALE_FACTOR,
     ) -> XLSRQuantizable:
         """
         Parameters:
@@ -53,7 +54,7 @@ def from_pretrained(
             elif None: Doesn't load any encodings. Used when computing encodings.
             else: Interprets as a filepath and loads the encodings stored there.
         """
-        fp16_model = XLSR.from_pretrained()
+        fp16_model = XLSR.from_pretrained(scale_factor)
         input_shape = cls.get_input_spec()["image"][0]
 
         model = prepare_model(fp16_model)
@@ -76,4 +77,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        return cls(sim)
+        return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml
index 1676715b..c06896f0 100644
--- a/qai_hub_models/models/xlsr_quantized/perf.yaml
+++ b/qai_hub_models/models/xlsr_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: XLSR-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1130.0
-      throughput: 884.9557522123894
+      inference_time: 1141.0
+      throughput: 876.4241893076249
       estimated_peak_memory_range:
-        min: 20480
-        max: 1508352
+        min: 28672
+        max: 5356448
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +54,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jqp4w2xlg
+      job_id: jmg99xomg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 794.0
-      throughput: 1259.4458438287154
+      inference_time: 799.0
+      throughput: 1251.5644555694619
       estimated_peak_memory_range:
-        min: 12288
-        max: 10075288
+        min: 16384
+        max: 12173096
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,14 +69,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jep2mz7m5
+      job_id: j0pxed085
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1260.0
-      throughput: 793.6507936507936
+      inference_time: 769.0
+      throughput: 1300.3901170351105
       estimated_peak_memory_range:
-        min: 212992
-        max: 10412440
+        min: 12288
+        max: 3749080
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -84,7 +84,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jn5q2d7m5
+      job_id: jep23vo6g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +93,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.706754Z'
+    timestamp: '2024-06-11T12:00:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 945.0
-      throughput: 1058.2010582010582
+      inference_time: 943.0
+      throughput: 1060.4453870625662
       estimated_peak_memory_range:
-        min: 12288
-        max: 20770656
+        min: 16384
+        max: 21882800
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +107,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: j0px1z79g
+      job_id: jnp1qvong
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 545.0
-      throughput: 1834.8623853211009
+      inference_time: 546.0
+      throughput: 1831.5018315018315
       estimated_peak_memory_range:
-        min: 61440
-        max: 18351280
+        min: 65536
+        max: 19116992
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,14 +122,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jqpydy44p
+      job_id: jo5mvd975
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 850.0
-      throughput: 1176.4705882352941
+      inference_time: 552.0
+      throughput: 1811.5942028985507
       estimated_peak_memory_range:
-        min: 212992
-        max: 13825792
+        min: 61440
+        max: 18287376
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -137,7 +137,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: j1glkq0lp
+      job_id: jqpyv780p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +146,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.706797Z'
+    timestamp: '2024-06-11T12:00:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 1131.0
-      throughput: 884.1732979664014
+      inference_time: 1145.0
+      throughput: 873.3624454148471
       estimated_peak_memory_range:
-        min: 12288
-        max: 1681000
+        min: 106496
+        max: 1718744
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +160,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jo5mzlwqp
+      job_id: jvgd7z66g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 793.0
-      throughput: 1261.034047919294
+      inference_time: 807.0
+      throughput: 1239.1573729863692
       estimated_peak_memory_range:
-        min: 20480
-        max: 15313544
+        min: 16384
+        max: 17351048
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +175,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: j1p87k385
+      job_id: jopr1nxkg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +184,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.706823Z'
+    timestamp: '2024-06-11T12:00:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 3650.0
-      throughput: 273.972602739726
+      inference_time: 2637.0
+      throughput: 379.21880925293897
       estimated_peak_memory_range:
         min: 12288
-        max: 15804208
+        max: 14920896
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,22 +198,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jegnew9mg
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 1961.0
-      throughput: 509.94390617032127
-      estimated_peak_memory_range:
-        min: 61440
-        max: 18977360
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 17
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 17
-      job_id: jogkyklop
+      job_id: jz57v7on5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -222,13 +207,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:43.706849Z'
+    timestamp: '2024-06-11T11:59:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 14496.0
-      throughput: 68.98454746136865
+      inference_time: 11523.0
+      throughput: 86.78295582747549
       estimated_peak_memory_range:
-        min: 6959104
-        max: 13312008
+        min: 2777088
+        max: 8508512
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -236,7 +221,7 @@ models:
         layers_on_gpu: 9
         layers_on_cpu: 5
         total_layers: 17
-      job_id: jopry74eg
+      job_id: jqp4j9e2p
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -245,13 +230,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:43.706864Z'
+    timestamp: '2024-06-11T11:59:56Z'
   - torchscript_onnx_qnn:
-      inference_time: 951.0
-      throughput: 1051.5247108307046
+      inference_time: 960.0
+      throughput: 1041.6666666666667
       estimated_peak_memory_range:
-        min: 53248
-        max: 53248
+        min: 57344
+        max: 57344
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -259,14 +244,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: j2p0rx1ep
+      job_id: jegnr71j5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 1145.0
-      throughput: 873.3624454148471
+      inference_time: 750.0
+      throughput: 1333.3333333333333
       estimated_peak_memory_range:
-        min: 8810496
-        max: 8810496
+        min: 7811072
+        max: 7811072
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -274,7 +259,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jw561037p
+      job_id: j2p0ev905
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -283,4 +268,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.706892Z'
+    timestamp: '2024-06-11T12:00:04Z'
diff --git a/qai_hub_models/models/xlsr_quantized/test.py b/qai_hub_models/models/xlsr_quantized/test.py
index 7ec905dc..609cda51 100644
--- a/qai_hub_models/models/xlsr_quantized/test.py
+++ b/qai_hub_models/models/xlsr_quantized/test.py
@@ -6,19 +6,15 @@
 import torch
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
-from qai_hub_models.models.xlsr_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS
+from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.xlsr_quantized.demo import main as demo_main
-from qai_hub_models.models.xlsr_quantized.model import (
-    MODEL_ASSET_VERSION,
-    MODEL_ID,
-    XLSRQuantizable,
-)
+from qai_hub_models.models.xlsr_quantized.model import XLSRQuantizable
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
-from qai_hub_models.utils.testing import skip_clone_repo_check
+from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
 
-OUTPUT_IMAGE_LOCAL_PATH = "xlsr_quantized_demo_output.png"
 OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, OUTPUT_IMAGE_LOCAL_PATH
+    MODEL_ID, MODEL_ASSET_VERSION, "xlsr_demo_output.png"
 )
 
 
@@ -32,9 +28,10 @@ def test_task():
     app = SuperResolutionApp(model=model)
     app_output_image = app.upscale_image(image)[0]
 
-    np.testing.assert_allclose(
+    assert_most_close(
         np.asarray(app_output_image, dtype=np.float32) / 255,
         np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=1e-4,
         rtol=0.02,
         atol=0.2,
     )
diff --git a/qai_hub_models/models/yolonas/README.md b/qai_hub_models/models/yolonas/README.md
index 15cb8fb0..d6f5aca9 100644
--- a/qai_hub_models/models/yolonas/README.md
+++ b/qai_hub_models/models/yolonas/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-NAS can be found
   [here](https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [YOLO-NAS by Deci Achieves SOTA Performance on Object Detection Using Neural Architecture Search](https://deci.ai/blog/yolo-nas-object-detection-foundation-model/)
diff --git a/qai_hub_models/models/yolonas/export.py b/qai_hub_models/models/yolonas/export.py
index 15a8dcc5..3edacba9 100644
--- a/qai_hub_models/models/yolonas/export.py
+++ b/qai_hub_models/models/yolonas/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/yolonas/info.yaml b/qai_hub_models/models/yolonas/info.yaml
index b2b6b9e2..a6b7b84b 100644
--- a/qai_hub_models/models/yolonas/info.yaml
+++ b/qai_hub_models/models/yolonas/info.yaml
@@ -38,3 +38,4 @@ license_type: apache-2.0
 deploy_license_type: AI Model Hub License
 dataset:
   - COCO
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolonas/perf.yaml b/qai_hub_models/models/yolonas/perf.yaml
index c9303e01..4798067d 100644
--- a/qai_hub_models/models/yolonas/perf.yaml
+++ b/qai_hub_models/models/yolonas/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Yolo-NAS
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11709.0
-      throughput: 85.40438978563498
+      inference_time: 12935.0
+      throughput: 77.30962504831851
       estimated_peak_memory_range:
-        min: 53248
-        max: 7527368
+        min: 245760
+        max: 7789312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 201
-      job_id: jwgov91d5
+      job_id: jmg993llg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14818.0
-      throughput: 67.48549061951681
+      inference_time: 14574.0
+      throughput: 68.61534239055852
       estimated_peak_memory_range:
-        min: 4919296
-        max: 24147176
+        min: 5861376
+        max: 20985784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jlpevnr05
+      job_id: jz57vjyl5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 9857.0
-      throughput: 101.45074566298062
+      inference_time: 9949.0
+      throughput: 100.51261433309881
       estimated_peak_memory_range:
-        min: 438272
-        max: 60081736
+        min: 540672
+        max: 61160336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jnp18mdlg
+      job_id: jegnr96r5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.750765Z'
+    timestamp: '2024-06-08T23:29:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 8112.0
-      throughput: 123.2741617357002
+      inference_time: 9036.0
+      throughput: 110.66843736166446
       estimated_peak_memory_range:
-        min: 241664
-        max: 95426800
+        min: 217088
+        max: 99001056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 201
-      job_id: j1pvwn1mg
+      job_id: jnp1qd42g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 10117.0
-      throughput: 98.84353069091628
+      inference_time: 10109.0
+      throughput: 98.92175289346127
       estimated_peak_memory_range:
-        min: 4947968
-        max: 97898304
+        min: 4931584
+        max: 92525504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jygz70x6p
+      job_id: jqp4jxlvp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6782.0
-      throughput: 147.4491300501327
+      inference_time: 6486.0
+      throughput: 154.17823003391922
       estimated_peak_memory_range:
-        min: 7028736
-        max: 53864960
+        min: 4931584
+        max: 56975920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jvgdvmrlg
+      job_id: jopr14v9g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.750867Z'
+    timestamp: '2024-06-08T23:29:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 11707.0
-      throughput: 85.41898009737764
+      inference_time: 12949.0
+      throughput: 77.22604062089736
       estimated_peak_memory_range:
-        min: 266240
-        max: 4871408
+        min: 225280
+        max: 7472208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 201
-      job_id: j7gjl808p
+      job_id: jvgd7rxeg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 15053.0
-      throughput: 66.43194047698134
+      inference_time: 15243.0
+      throughput: 65.603883749918
       estimated_peak_memory_range:
-        min: 4960256
-        max: 20004872
+        min: 4952064
+        max: 23136736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jmg94q3v5
+      job_id: jo5mvwnw5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +178,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.750934Z'
+    timestamp: '2024-06-08T23:29:28Z'
   - torchscript_onnx_qnn:
-      inference_time: 11914.0
-      throughput: 83.9348665435622
+      inference_time: 11897.0
+      throughput: 84.05480373203329
       estimated_peak_memory_range:
-        min: 4923392
-        max: 4923392
+        min: 4808704
+        max: 4808704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jz5w9rdjp
+      job_id: j0pxe7k15
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 10145.0
-      throughput: 98.57072449482504
+      inference_time: 10119.0
+      throughput: 98.82399446585632
       estimated_peak_memory_range:
-        min: 14712832
-        max: 14712832
+        min: 5672960
+        max: 5672960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jz57d8vr5
+      job_id: jep237k4g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.751010Z'
+    timestamp: '2024-06-08T23:29:31Z'
diff --git a/qai_hub_models/models/yolonas/requirements.txt b/qai_hub_models/models/yolonas/requirements.txt
index b6f0ec66..2466ff49 100644
--- a/qai_hub_models/models/yolonas/requirements.txt
+++ b/qai_hub_models/models/yolonas/requirements.txt
@@ -7,3 +7,5 @@ einops==0.3.2
 Deprecated==1.2.11
 data-gradients==0.3.1
 shapely==2.0.3
+boto3==1.34.119
+torchmetrics==1.4.0.post0
diff --git a/qai_hub_models/models/yolonas_quantized/README.md b/qai_hub_models/models/yolonas_quantized/README.md
index eed10e62..61c64bb3 100644
--- a/qai_hub_models/models/yolonas_quantized/README.md
+++ b/qai_hub_models/models/yolonas_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-NAS-Quantized can be found
   [here](https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
 
 ## References
 * [YOLO-NAS by Deci Achieves SOTA Performance on Object Detection Using Neural Architecture Search](https://deci.ai/blog/yolo-nas-object-detection-foundation-model/)
diff --git a/qai_hub_models/models/yolonas_quantized/export.py b/qai_hub_models/models/yolonas_quantized/export.py
index eadf61e9..86a7b17f 100644
--- a/qai_hub_models/models/yolonas_quantized/export.py
+++ b/qai_hub_models/models/yolonas_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -225,7 +225,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/yolonas_quantized/info.yaml b/qai_hub_models/models/yolonas_quantized/info.yaml
index d3a0e9d4..3c91dd39 100644
--- a/qai_hub_models/models/yolonas_quantized/info.yaml
+++ b/qai_hub_models/models/yolonas_quantized/info.yaml
@@ -40,3 +40,4 @@ license_type: apache-2.0
 deploy_license_type: AI Model Hub License
 dataset:
   - COCO
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolonas_quantized/perf.yaml b/qai_hub_models/models/yolonas_quantized/perf.yaml
index 64592233..00f23b93 100644
--- a/qai_hub_models/models/yolonas_quantized/perf.yaml
+++ b/qai_hub_models/models/yolonas_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: Yolo-NAS-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6971.0
-      throughput: 143.45144168698894
+      inference_time: 6973.0
+      throughput: 143.41029685931449
       estimated_peak_memory_range:
-        min: 9261056
-        max: 12251360
+        min: 10432512
+        max: 13902448
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +54,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 203
-      job_id: j0px1ze9g
+      job_id: j2p0e1z65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +63,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.785555Z'
+    timestamp: '2024-06-08T23:30:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 5192.0
-      throughput: 192.6040061633282
+      inference_time: 5003.0
+      throughput: 199.8800719568259
       estimated_peak_memory_range:
-        min: 913408
-        max: 62958576
+        min: 356352
+        max: 64309792
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +77,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 203
-      job_id: jo5mzlvqp
+      job_id: j1p8w3qxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +86,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.785595Z'
+    timestamp: '2024-06-08T23:30:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 6975.0
-      throughput: 143.36917562724014
+      inference_time: 6937.0
+      throughput: 144.15453366008362
       estimated_peak_memory_range:
-        min: 10153984
-        max: 13662784
+        min: 10485760
+        max: 42442768
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +100,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 203
-      job_id: jegnewrmg
+      job_id: jogkrle25
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,13 +109,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.785630Z'
+    timestamp: '2024-06-08T23:30:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 22146.0
-      throughput: 45.154881242662334
+      inference_time: 23899.0
+      throughput: 41.84275492698439
       estimated_peak_memory_range:
-        min: 200704
-        max: 52808240
+        min: 765952
+        max: 56795680
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -123,7 +123,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 203
-      job_id: jopry71eg
+      job_id: jn5q9764p
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -132,13 +132,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:43.785665Z'
+    timestamp: '2024-06-08T23:30:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 115607.0
-      throughput: 8.649995242502616
+      inference_time: 131373.0
+      throughput: 7.611914168055841
       estimated_peak_memory_range:
-        min: 44011520
-        max: 52977680
+        min: 15310848
+        max: 24594432
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -146,7 +146,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 203
         total_layers: 203
-      job_id: jep2mz3m5
+      job_id: j1gle0v8p
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -155,12 +155,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:43.785698Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.785705Z'
+    timestamp: '2024-06-08T23:30:50Z'
diff --git a/qai_hub_models/models/yolonas_quantized/requirements.txt b/qai_hub_models/models/yolonas_quantized/requirements.txt
index 930698bd..4904c5ac 100644
--- a/qai_hub_models/models/yolonas_quantized/requirements.txt
+++ b/qai_hub_models/models/yolonas_quantized/requirements.txt
@@ -8,3 +8,5 @@ einops==0.3.2
 Deprecated==1.2.11
 data-gradients==0.3.1
 shapely==2.0.3
+boto3==1.34.119
+torchmetrics==1.4.0.post0
diff --git a/qai_hub_models/models/yolov6/README.md b/qai_hub_models/models/yolov6/README.md
index d3d4f458..97f127f0 100644
--- a/qai_hub_models/models/yolov6/README.md
+++ b/qai_hub_models/models/yolov6/README.md
@@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-v6 can be found
   [here](https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE)
 
 ## References
 * [YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications](https://arxiv.org/abs/2209.02976)
diff --git a/qai_hub_models/models/yolov6/export.py b/qai_hub_models/models/yolov6/export.py
index 9895e986..1b9b17c2 100644
--- a/qai_hub_models/models/yolov6/export.py
+++ b/qai_hub_models/models/yolov6/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/yolov6/info.yaml b/qai_hub_models/models/yolov6/info.yaml
index 419842fd..e9dda573 100644
--- a/qai_hub_models/models/yolov6/info.yaml
+++ b/qai_hub_models/models/yolov6/info.yaml
@@ -39,3 +39,4 @@ has_animated_banner: yes
 license_type: gpl-3.0
 deploy_license_type: gpl-3.0
 dataset: []
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml
index 6a28fd5e..896ae16b 100644
--- a/qai_hub_models/models/yolov6/perf.yaml
+++ b/qai_hub_models/models/yolov6/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Yolo-v6
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6721.0
-      throughput: 148.78738282993604
+      inference_time: 7424.0
+      throughput: 134.69827586206895
       estimated_peak_memory_range:
         min: 12288
-        max: 3249784
+        max: 3603960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: jwgov9ed5
+      job_id: jz5wmdw6g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5377.0
-      throughput: 185.97731076808628
+      inference_time: 5369.0
+      throughput: 186.25442354255912
       estimated_peak_memory_range:
-        min: 4210688
-        max: 16028088
+        min: 4968448
+        max: 16471240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jlpevn405
+      job_id: jvgd7rneg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6458.0
-      throughput: 154.8467017652524
+      inference_time: 7761.0
+      throughput: 128.84937508053085
       estimated_peak_memory_range:
-        min: 5177344
-        max: 34730048
+        min: 5341184
+        max: 35743744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jnp18mqlg
+      job_id: jo5mvwew5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.810603Z'
+    timestamp: '2024-06-08T23:31:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 4753.0
-      throughput: 210.39343572480539
+      inference_time: 5294.0
+      throughput: 188.89308651303364
       estimated_peak_memory_range:
-        min: 20480
-        max: 78753056
+        min: 40960
+        max: 79662544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: j1pvwnzmg
+      job_id: jmg9930lg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3870.0
-      throughput: 258.3979328165375
+      inference_time: 3862.0
+      throughput: 258.9331952356292
       estimated_peak_memory_range:
         min: 4931584
-        max: 92796272
+        max: 95031952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jygz70v6p
+      job_id: jz57vj2l5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4911.0
-      throughput: 203.62451639177357
+      inference_time: 5600.0
+      throughput: 178.57142857142858
       estimated_peak_memory_range:
-        min: 4931584
-        max: 65416976
+        min: 835584
+        max: 60500960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jvgdvm7lg
+      job_id: jegnr90r5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.810694Z'
+    timestamp: '2024-06-08T23:31:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 6718.0
-      throughput: 148.85382554331647
+      inference_time: 7339.0
+      throughput: 136.2583458236817
       estimated_peak_memory_range:
-        min: 229376
-        max: 21141768
+        min: 45056
+        max: 9009312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: j7gjl8k8p
+      job_id: jnp1qd22g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5351.0
-      throughput: 186.88095683049897
+      inference_time: 5384.0
+      throughput: 185.73551263001485
       estimated_peak_memory_range:
-        min: 4956160
-        max: 16811712
+        min: 4939776
+        max: 16906872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jmg94q9v5
+      job_id: j0pxe7915
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.810752Z'
+    timestamp: '2024-06-08T23:31:23Z'
   - torchscript_onnx_qnn:
-      inference_time: 6760.0
-      throughput: 147.92899408284023
+      inference_time: 6812.0
+      throughput: 146.7997651203758
       estimated_peak_memory_range:
         min: 4923392
         max: 4923392
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jz5w9rmjp
+      job_id: jqp4jxnvp
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6592.0
-      throughput: 151.6990291262136
+      inference_time: 6530.0
+      throughput: 153.1393568147014
       estimated_peak_memory_range:
-        min: 10207232
-        max: 10207232
+        min: 3538944
+        max: 3538944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jz57d86r5
+      job_id: jopr1469g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.810815Z'
+    timestamp: '2024-06-08T23:31:26Z'
diff --git a/qai_hub_models/models/yolov7/README.md b/qai_hub_models/models/yolov7/README.md
index e6ab3b03..d6bbd49d 100644
--- a/qai_hub_models/models/yolov7/README.md
+++ b/qai_hub_models/models/yolov7/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-v7 can be found
   [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md)
 
 ## References
 * [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/abs/2207.02696)
diff --git a/qai_hub_models/models/yolov7/export.py b/qai_hub_models/models/yolov7/export.py
index a794246c..ecc0f421 100644
--- a/qai_hub_models/models/yolov7/export.py
+++ b/qai_hub_models/models/yolov7/export.py
@@ -186,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -217,7 +217,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/yolov7/info.yaml b/qai_hub_models/models/yolov7/info.yaml
index 88298456..1e7dcaeb 100644
--- a/qai_hub_models/models/yolov7/info.yaml
+++ b/qai_hub_models/models/yolov7/info.yaml
@@ -37,3 +37,4 @@ has_animated_banner: yes
 license_type: gpl-3.0
 deploy_license_type: gpl-3.0
 dataset: []
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml
index 3ea222f6..4a8b5cd0 100644
--- a/qai_hub_models/models/yolov7/perf.yaml
+++ b/qai_hub_models/models/yolov7/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: Yolo-v7
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 15910.0
-      throughput: 62.853551225644246
+      inference_time: 15912.0
+      throughput: 62.845651080945196
       estimated_peak_memory_range:
-        min: 659456
-        max: 2744520
+        min: 36864
+        max: 24453640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 12
         total_layers: 215
-      job_id: j0px1zm9g
+      job_id: jqpyv4z7p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 13428.0
-      throughput: 74.47125409591898
+      inference_time: 13978.0
+      throughput: 71.5409929889827
       estimated_peak_memory_range:
-        min: 2269184
-        max: 36609320
+        min: 1499136
+        max: 35988136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 12
         total_layers: 225
-      job_id: j2p0rxkep
+      job_id: jw56q320g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.845281Z'
+    timestamp: '2024-06-08T23:31:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 10854.0
-      throughput: 92.13193292795283
+      inference_time: 10805.0
+      throughput: 92.5497454881999
       estimated_peak_memory_range:
-        min: 32768
-        max: 59666608
+        min: 1200128
+        max: 65074384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 12
         total_layers: 215
-      job_id: jo5mzl4qp
+      job_id: j2p0e1465
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 9618.0
-      throughput: 103.97171969224371
+      inference_time: 8800.0
+      throughput: 113.63636363636364
       estimated_peak_memory_range:
-        min: 7049216
-        max: 68347616
+        min: 7557120
+        max: 68407936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 12
         total_layers: 225
-      job_id: j1p87k885
+      job_id: j1p3q4nl5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.845348Z'
+    timestamp: '2024-06-08T23:31:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 15980.0
-      throughput: 62.57822277847309
+      inference_time: 15993.0
+      throughput: 62.52735571812668
       estimated_peak_memory_range:
-        min: 1228800
-        max: 3705800
+        min: 1232896
+        max: 3455120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 12
         total_layers: 215
-      job_id: jegnewxmg
+      job_id: j1p8w32xp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.845383Z'
+    timestamp: '2024-06-08T23:31:48Z'
   - torchscript_onnx_ort:
-      inference_time: 13426.0
-      throughput: 74.48234768359899
+      inference_time: 13386.0
+      throughput: 74.70491558344538
       estimated_peak_memory_range:
-        min: 4927488
-        max: 4927488
+        min: 4964352
+        max: 4964352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 12
         total_layers: 225
-      job_id: jogkykdop
+      job_id: jwgoe1zxp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.845418Z'
+    timestamp: '2024-06-08T23:31:55Z'
diff --git a/qai_hub_models/models/yolov7_quantized/README.md b/qai_hub_models/models/yolov7_quantized/README.md
index 390b486d..2535d8d0 100644
--- a/qai_hub_models/models/yolov7_quantized/README.md
+++ b/qai_hub_models/models/yolov7_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-v7-Quantized can be found
   [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md)
 
 ## References
 * [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/abs/2207.02696)
diff --git a/qai_hub_models/models/yolov7_quantized/export.py b/qai_hub_models/models/yolov7_quantized/export.py
index 97078f0f..a8d2b1bc 100644
--- a/qai_hub_models/models/yolov7_quantized/export.py
+++ b/qai_hub_models/models/yolov7_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -225,7 +225,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/yolov7_quantized/info.yaml b/qai_hub_models/models/yolov7_quantized/info.yaml
index 9ce2d281..9799b03c 100644
--- a/qai_hub_models/models/yolov7_quantized/info.yaml
+++ b/qai_hub_models/models/yolov7_quantized/info.yaml
@@ -40,3 +40,4 @@ has_animated_banner: yes
 license_type: gpl-3.0
 deploy_license_type: gpl-3.0
 dataset: []
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolov7_quantized/perf.yaml b/qai_hub_models/models/yolov7_quantized/perf.yaml
index 34341b33..765fdc6d 100644
--- a/qai_hub_models/models/yolov7_quantized/perf.yaml
+++ b/qai_hub_models/models/yolov7_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: Yolo-v7-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 4610.0
-      throughput: 216.91973969631238
+      inference_time: 4596.0
+      throughput: 217.58050478677112
       estimated_peak_memory_range:
-        min: 319488
-        max: 4163328
+        min: 311296
+        max: 2244624
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +54,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: jw5610v7p
+      job_id: j7gjk0dx5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +63,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.871316Z'
+    timestamp: '2024-06-08T23:32:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 2954.0
-      throughput: 338.52403520649966
+      inference_time: 2999.0
+      throughput: 333.4444814938313
       estimated_peak_memory_range:
-        min: 12288
-        max: 59764048
+        min: 32768
+        max: 61022912
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +77,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: jwgov9md5
+      job_id: jlpe4ro15
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +86,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.871356Z'
+    timestamp: '2024-06-08T23:32:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 4573.0
-      throughput: 218.67483052700635
+      inference_time: 4588.0
+      throughput: 217.9598953792502
       estimated_peak_memory_range:
-        min: 278528
-        max: 2823912
+        min: 299008
+        max: 3108488
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +100,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: j1pvwn4mg
+      job_id: jygzvx2kp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,13 +109,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.871392Z'
+    timestamp: '2024-06-08T23:32:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 10865.0
-      throughput: 92.03865623561896
+      inference_time: 10699.0
+      throughput: 93.46667912889055
       estimated_peak_memory_range:
         min: 266240
-        max: 55222224
+        max: 56452384
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -123,7 +123,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: jlpevn205
+      job_id: jz5wmd26g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -132,13 +132,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:43.871427Z'
+    timestamp: '2024-06-08T23:32:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 92308.0
-      throughput: 10.833297222342592
+      inference_time: 93320.0
+      throughput: 10.715816545220745
       estimated_peak_memory_range:
-        min: 8777728
-        max: 13117240
+        min: 8769536
+        max: 46392104
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -146,7 +146,7 @@ models:
         layers_on_gpu: 126
         layers_on_cpu: 68
         total_layers: 226
-      job_id: jygz70w6p
+      job_id: jmg993jlg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -155,12 +155,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:43.871461Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.871468Z'
+    timestamp: '2024-06-08T23:32:29Z'
diff --git a/qai_hub_models/models/yolov8_det/README.md b/qai_hub_models/models/yolov8_det/README.md
index c82afce9..aa52c80d 100644
--- a/qai_hub_models/models/yolov8_det/README.md
+++ b/qai_hub_models/models/yolov8_det/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of YOLOv8-Detection can be found
   [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)
 
 ## References
 * [Ultralytics YOLOv8 Docs: Object Detection](https://docs.ultralytics.com/tasks/detect/)
diff --git a/qai_hub_models/models/yolov8_det/export.py b/qai_hub_models/models/yolov8_det/export.py
index 222ace93..b8418123 100644
--- a/qai_hub_models/models/yolov8_det/export.py
+++ b/qai_hub_models/models/yolov8_det/export.py
@@ -188,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/yolov8_det/info.yaml b/qai_hub_models/models/yolov8_det/info.yaml
index 2ede9f32..303126cd 100644
--- a/qai_hub_models/models/yolov8_det/info.yaml
+++ b/qai_hub_models/models/yolov8_det/info.yaml
@@ -37,3 +37,4 @@ has_animated_banner: yes
 license_type: agpl-3.0
 deploy_license_type: agpl-3.0
 dataset: []
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml
index 6147efd8..79f7e29d 100644
--- a/qai_hub_models/models/yolov8_det/perf.yaml
+++ b/qai_hub_models/models/yolov8_det/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: YOLOv8-Detection
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 5881.0
-      throughput: 170.03910899506886
+      inference_time: 5900.0
+      throughput: 169.4915254237288
       estimated_peak_memory_range:
-        min: 249856
-        max: 3086376
+        min: 40960
+        max: 11760568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jegnewomg
+      job_id: j0pxe7n35
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5194.0
-      throughput: 192.52984212552946
+      inference_time: 5248.0
+      throughput: 190.5487804878049
       estimated_peak_memory_range:
-        min: 4935680
-        max: 19454832
+        min: 4919296
+        max: 17813040
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +63,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: jqpydyq4p
+      job_id: jep2370rg
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6201.0
-      throughput: 161.26431220770843
+      inference_time: 6498.0
+      throughput: 153.8935056940597
       estimated_peak_memory_range:
-        min: 4210688
-        max: 36287088
+        min: 8409088
+        max: 39812256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 286
-      job_id: jn5q2dzm5
+      job_id: jogkrl7w5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.896469Z'
+    timestamp: '2024-06-08T23:33:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 4124.0
-      throughput: 242.48302618816683
+      inference_time: 4177.0
+      throughput: 239.40627244433804
       estimated_peak_memory_range:
-        min: 49152
-        max: 86003712
+        min: 16384
+        max: 87350704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +101,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jopry7oeg
+      job_id: jo5mvwqd5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3684.0
-      throughput: 271.4440825190011
+      inference_time: 3699.0
+      throughput: 270.3433360367667
       estimated_peak_memory_range:
         min: 4931584
-        max: 105033424
+        max: 104903584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +116,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: j2p0rxdep
+      job_id: jqpyv4r8p
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 4667.0
-      throughput: 214.27040925648168
+      inference_time: 4564.0
+      throughput: 219.10604732690624
       estimated_peak_memory_range:
-        min: 4222976
-        max: 68052736
+        min: 7028736
+        max: 68265872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 286
-      job_id: j1glkqolp
+      job_id: jn5q97enp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +140,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.896588Z'
+    timestamp: '2024-06-08T23:33:08Z'
   - torchscript_onnx_tflite:
-      inference_time: 5889.0
-      throughput: 169.80811682798438
+      inference_time: 5907.0
+      throughput: 169.29067208396816
       estimated_peak_memory_range:
-        min: 258048
-        max: 2622624
+        min: 245760
+        max: 2242704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +154,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jep2mz4m5
+      job_id: jopr1480g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5214.0
-      throughput: 191.79133103183736
+      inference_time: 5193.0
+      throughput: 192.56691700365877
       estimated_peak_memory_range:
-        min: 4931584
-        max: 19569632
+        min: 4947968
+        max: 19559888
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +169,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: jogkykoop
+      job_id: j1p8w30kp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.896665Z'
+    timestamp: '2024-06-08T23:33:06Z'
   - torchscript_onnx_qnn:
-      inference_time: 5796.0
-      throughput: 172.5327812284334
+      inference_time: 5771.0
+      throughput: 173.28019407381737
       estimated_peak_memory_range:
         min: 4923392
         max: 4923392
@@ -192,14 +192,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: j1p87k685
+      job_id: j2p0e1395
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 6424.0
-      throughput: 155.6662515566625
+      inference_time: 6381.0
+      throughput: 156.7152483936687
       estimated_peak_memory_range:
-        min: 8925184
-        max: 8925184
+        min: 10723328
+        max: 10723328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 286
-      job_id: jw5610r7p
+      job_id: j1gle06jp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +216,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.896740Z'
+    timestamp: '2024-06-08T23:33:09Z'
diff --git a/qai_hub_models/models/yolov8_det_quantized/README.md b/qai_hub_models/models/yolov8_det_quantized/README.md
index 874a00c7..75da973b 100644
--- a/qai_hub_models/models/yolov8_det_quantized/README.md
+++ b/qai_hub_models/models/yolov8_det_quantized/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of YOLOv8-Detection-Quantized can be found
   [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)
 
 ## References
 * [Ultralytics YOLOv8 Docs: Object Detection](https://docs.ultralytics.com/tasks/detect/)
diff --git a/qai_hub_models/models/yolov8_det_quantized/export.py b/qai_hub_models/models/yolov8_det_quantized/export.py
index 8d2d1fa1..1c3d53f0 100644
--- a/qai_hub_models/models/yolov8_det_quantized/export.py
+++ b/qai_hub_models/models/yolov8_det_quantized/export.py
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -225,7 +225,12 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False)
+    parser = export_parser(
+        model_cls=Model,
+        supports_qnn=False,
+        supports_ort=False,
+        supports_precompiled_ort=False,
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/yolov8_det_quantized/info.yaml b/qai_hub_models/models/yolov8_det_quantized/info.yaml
index 09e86fec..5751179f 100644
--- a/qai_hub_models/models/yolov8_det_quantized/info.yaml
+++ b/qai_hub_models/models/yolov8_det_quantized/info.yaml
@@ -40,3 +40,4 @@ has_animated_banner: yes
 license_type: agpl-3.0
 deploy_license_type: agpl-3.0
 dataset: []
+labels_file: coco_labels.txt
diff --git a/qai_hub_models/models/yolov8_det_quantized/perf.yaml b/qai_hub_models/models/yolov8_det_quantized/perf.yaml
index ef54d1c0..9271d6d2 100644
--- a/qai_hub_models/models/yolov8_det_quantized/perf.yaml
+++ b/qai_hub_models/models/yolov8_det_quantized/perf.yaml
@@ -42,11 +42,11 @@ models:
 - name: YOLOv8-Detection-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2342.0
-      throughput: 426.9854824935952
+      inference_time: 2332.0
+      throughput: 428.8164665523156
       estimated_peak_memory_range:
         min: 12288
-        max: 2396192
+        max: 3599048
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +54,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 277
-      job_id: jwgov9od5
+      job_id: jwgoe1kqp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +63,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.931301Z'
+    timestamp: '2024-06-08T23:33:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 1597.0
-      throughput: 626.1740763932373
+      inference_time: 1594.0
+      throughput: 627.3525721455458
       estimated_peak_memory_range:
         min: 12288
-        max: 48933824
+        max: 49918192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +77,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 277
-      job_id: j1pvwnemg
+      job_id: j1pvz1rkg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +86,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.931347Z'
+    timestamp: '2024-06-08T23:33:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 2340.0
-      throughput: 427.35042735042737
+      inference_time: 2326.0
+      throughput: 429.9226139294927
       estimated_peak_memory_range:
-        min: 16384
-        max: 1938968
+        min: 12288
+        max: 2668824
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +100,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 277
-      job_id: j7gjl8o8p
+      job_id: j7gjk02v5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,13 +109,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.931388Z'
+    timestamp: '2024-06-08T23:33:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 7122.0
-      throughput: 140.40999719180004
+      inference_time: 6463.0
+      throughput: 154.7269070091289
       estimated_peak_memory_range:
-        min: 77824
-        max: 34152912
+        min: 81920
+        max: 33931536
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -123,7 +123,7 @@ models:
         layers_on_gpu: 1
         layers_on_cpu: 1
         total_layers: 277
-      job_id: jlpevn805
+      job_id: jlpe4rwo5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -132,13 +132,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-05-29T18:59:43.931428Z'
+    timestamp: '2024-06-08T23:33:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 46687.0
-      throughput: 21.419238760254462
+      inference_time: 46343.0
+      throughput: 21.57823187967978
       estimated_peak_memory_range:
-        min: 2846720
-        max: 17832816
+        min: 1802240
+        max: 10846104
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -146,7 +146,7 @@ models:
         layers_on_gpu: 2
         layers_on_cpu: 1
         total_layers: 277
-      job_id: jygz7086p
+      job_id: jygzvxjop
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -155,12 +155,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-05-29T18:59:43.931467Z'
-  - reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
-      manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.931474Z'
+    timestamp: '2024-06-08T23:33:55Z'
diff --git a/qai_hub_models/models/yolov8_seg/README.md b/qai_hub_models/models/yolov8_seg/README.md
index 518fab1f..75df2424 100644
--- a/qai_hub_models/models/yolov8_seg/README.md
+++ b/qai_hub_models/models/yolov8_seg/README.md
@@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of YOLOv8-Segmentation can be found
   [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+- The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)
 
 ## References
 * [Ultralytics YOLOv8 Docs: Instance Segmentation](https://docs.ultralytics.com/tasks/segment/)
diff --git a/qai_hub_models/models/yolov8_seg/export.py b/qai_hub_models/models/yolov8_seg/export.py
index 4156e8c5..d2ecb2c9 100644
--- a/qai_hub_models/models/yolov8_seg/export.py
+++ b/qai_hub_models/models/yolov8_seg/export.py
@@ -33,6 +33,7 @@
     can_access_qualcomm_ai_hub,
     export_without_hub_access,
     transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
 )
 
 
@@ -123,7 +124,7 @@ def export_model(
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
-        " --force_channel_last_input image"
+        " --force_channel_last_input image" + " --force_channel_last_output output_4"
         if target_runtime != TargetRuntime.ORT
         else ""
     )
@@ -188,7 +189,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime == TargetRuntime.ORT:
+        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -207,6 +208,14 @@ def export_model(
         torch_out = torch_inference(model, sample_inputs)
         assert inference_job is not None and inference_job.wait().success
         inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = (
+            inference_result
+            if target_runtime == TargetRuntime.ORT
+            else transpose_channel_last_to_first(
+                "output_4", inference_result, target_runtime
+            )
+        )
         print_inference_metrics(
             inference_job, inference_result, torch_out, outputs_to_skip=[3]
         )
@@ -219,7 +228,9 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(
+        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+    )
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml
index 5b299571..b39496c1 100644
--- a/qai_hub_models/models/yolov8_seg/perf.yaml
+++ b/qai_hub_models/models/yolov8_seg/perf.yaml
@@ -36,11 +36,11 @@ models:
 - name: YOLOv8-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7371.0
-      throughput: 135.666802333469
+      inference_time: 7329.0
+      throughput: 136.4442625187611
       estimated_peak_memory_range:
         min: 4210688
-        max: 6944112
+        max: 6975488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +48,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 337
-      job_id: j0px1zd1g
+      job_id: jo5mvw6d5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 7864.0
-      throughput: 127.1617497456765
+      inference_time: 7942.0
+      throughput: 125.91286829513976
       estimated_peak_memory_range:
-        min: 15581184
-        max: 42389248
+        min: 14696448
+        max: 42029952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +63,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 336
-      job_id: j1p87k4x5
+      job_id: jogkrlqw5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +72,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-29T18:59:43.956437Z'
+    timestamp: '2024-06-08T23:34:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 5327.0
-      throughput: 187.72292096865027
+      inference_time: 5452.0
+      throughput: 183.41892883345562
       estimated_peak_memory_range:
-        min: 16384
-        max: 94579136
+        min: 3268608
+        max: 101106816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +86,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 337
-      job_id: jo5mzldwp
+      job_id: jegnr9mk5
       job_status: Passed
     torchscript_onnx_ort:
-      inference_time: 5666.0
-      throughput: 176.49135192375573
+      inference_time: 5339.0
+      throughput: 187.30099269526127
       estimated_peak_memory_range:
-        min: 18112512
-        max: 81621696
+        min: 16973824
+        max: 81417296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 336
-      job_id: jogkyk92p
+      job_id: jn5q97rnp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +110,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-29T18:59:43.956525Z'
+    timestamp: '2024-06-08T23:34:31Z'
   - torchscript_onnx_tflite:
-      inference_time: 7372.0
-      throughput: 135.6483993488877
+      inference_time: 7404.0
+      throughput: 135.06212857914642
       estimated_peak_memory_range:
-        min: 4579328
-        max: 14344968
+        min: 4583424
+        max: 7403760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +124,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 337
-      job_id: jegnew7rg
+      job_id: jopr1420g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +133,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-29T18:59:43.956572Z'
+    timestamp: '2024-06-08T23:34:25Z'
   - torchscript_onnx_ort:
-      inference_time: 7723.0
-      throughput: 129.48336138806164
+      inference_time: 7762.0
+      throughput: 128.83277505797474
       estimated_peak_memory_range:
-        min: 22294528
-        max: 22294528
+        min: 22315008
+        max: 22315008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 336
-      job_id: jn5q2dm45
+      job_id: j1gle02jp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-29T18:59:43.956618Z'
+    timestamp: '2024-06-08T23:34:32Z'
diff --git a/qai_hub_models/requirements-dev.txt b/qai_hub_models/requirements-dev.txt
index e1b0706d..bf5cabdd 100644
--- a/qai_hub_models/requirements-dev.txt
+++ b/qai_hub_models/requirements-dev.txt
@@ -1,5 +1,5 @@
-boto3==1.34.40
-botocore==1.34.40
+boto3==1.34.119
+botocore==1.34.119
 coverage==5.3.1
 imageio[ffmpeg]==2.31.5
 jinja2==3.0.3
@@ -15,4 +15,4 @@ types-PyYAML==6.0.12.12
 types-pillow==10.2.0.20240213
 types-tabulate==0.9.0.20240106
 types-requests==2.31.0.6
-keyrings.envvars; python_version >= '3.9' # used only by CI
+keyrings.envvars==1.1.0; python_version >= '3.9' # used only by CI
diff --git a/qai_hub_models/utils/aimet/config_loader.py b/qai_hub_models/utils/aimet/config_loader.py
index dadc6012..6fbc15b0 100644
--- a/qai_hub_models/utils/aimet/config_loader.py
+++ b/qai_hub_models/utils/aimet/config_loader.py
@@ -21,6 +21,11 @@ def get_default_aimet_config() -> str:
     return str(path.resolve())
 
 
+def get_default_per_tensor_aimet_config() -> str:
+    path = Path(__file__).parent / "default_per_tensor_config.json"
+    return str(path.resolve())
+
+
 def get_aimet_config_path(name: str) -> str:
     path = Path(__file__).parent / f"{name}.json"
     return str(path.resolve())
diff --git a/qai_hub_models/utils/aimet/default_per_tensor_config.json b/qai_hub_models/utils/aimet/default_per_tensor_config.json
new file mode 100644
index 00000000..fda59a9d
--- /dev/null
+++ b/qai_hub_models/utils/aimet/default_per_tensor_config.json
@@ -0,0 +1,88 @@
+{
+  "defaults":
+  {
+    "ops":
+    {
+      "is_output_quantized": "True"
+    },
+    "params":
+    {
+      "is_quantized": "True",
+      "is_symmetric": "True"
+    },
+    "strict_symmetric": "False",
+    "unsigned_symmetric": "False",
+    "per_channel_quantization": "False"
+  },
+
+  "params":
+  {
+    "bias":
+    {
+      "is_quantized": "False"
+    }
+  },
+
+  "op_type":
+  {
+    "Squeeze":
+    {
+      "is_output_quantized": "True"
+    },
+    "Pad":
+    {
+      "is_output_quantized": "True"
+    },
+    "Mean":
+    {
+      "is_output_quantized": "False"
+    },
+    "Gemm":
+    {
+      "per_channel_quantization": "False"
+    },
+    "Sigmoid":
+    {
+      "encoding_constraints":
+      {
+        "min": 0.0,
+        "max": 0.99609375
+      }
+    },
+    "Softmax":
+    {
+      "encoding_constraints":
+      {
+        "min": 0.0,
+        "max": 0.99609375
+      }
+    }
+  },
+
+  "supergroups":
+  [
+    {
+      "op_list": ["Conv", "Relu"]
+    },
+  {
+      "op_list": ["Conv", "Clip"]
+  },
+    {
+      "op_list": ["Conv", "BatchNormalization", "Relu"]
+    },
+    {
+      "op_list": ["Add", "Relu"]
+    },
+    {
+      "op_list": ["Gemm", "Relu"]
+    }
+  ],
+
+  "model_input":
+  {
+    "is_input_quantized": "True"
+  },
+
+  "model_output":
+  {}
+}
diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py
index fa2aaefb..184ab87f 100644
--- a/qai_hub_models/utils/args.py
+++ b/qai_hub_models/utils/args.py
@@ -16,13 +16,15 @@
 from typing import Any, List, Mapping, Optional, Set, Type
 
 import qai_hub as hub
+from qai_hub.client import APIException, UserError
 
 from qai_hub_models.models.protocols import (
     FromPrecompiledTypeVar,
     FromPretrainedProtocol,
     FromPretrainedTypeVar,
+    HubModelProtocolTypeVar,
 )
-from qai_hub_models.utils.base_model import BaseModel, InputSpec, TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.inference import HubModel, compile_model_from_args
 from qai_hub_models.utils.qai_hub_helpers import can_access_qualcomm_ai_hub
 
@@ -35,7 +37,7 @@ def __init__(self, option_strings, dest, enum_type, **kwargs):
         self.enum_type = enum_type
 
     def __call__(self, parser, namespace, values, option_string=None):
-        setattr(namespace, self.dest, self.enum_type[values.upper()])
+        setattr(namespace, self.dest, self.enum_type[values.upper().replace("-", "_")])
 
 
 def get_parser() -> argparse.ArgumentParser:
@@ -79,7 +81,7 @@ def add_target_runtime_arg(
         type=str,
         action=partial(ParseEnumAction, enum_type=TargetRuntime),  # type: ignore
         default=default,
-        choices=[rt.name.lower() for rt in available_target_runtimes],
+        choices=[rt.name.lower().replace("_", "-") for rt in available_target_runtimes],
         help=help,
     )
     return parser
@@ -310,13 +312,14 @@ def demo_model_from_cli_args(
                 inference_options=cli_args.inference_options,
             )
             print(f"Exported asset: {model_id}\n")
+
     else:
         inference_model = model_from_cli_args(model_cls, cli_args)
     return inference_model
 
 
 def get_input_spec_kwargs(
-    model: "BaseModel", args_dict: Mapping[str, Any]
+    model: Type[HubModelProtocolTypeVar], args_dict: Mapping[str, Any]
 ) -> Mapping[str, Any]:
     """
     Given a dict with many args, pull out the ones relevant
@@ -363,24 +366,33 @@ def get_model_input_spec_parser(
 
 
 def input_spec_from_cli_args(
-    model: "BaseModel", cli_args: argparse.Namespace
-) -> "InputSpec":
+    model: Type[HubModelProtocolTypeVar], cli_args: argparse.Namespace
+) -> hub.InputSpecs:
     """
     Create this model's input spec from an argparse namespace.
     Default behavior is to assume the CLI args have the same names as get_input_spec method args.
+    Also, fetches shapes if demo is run on-device.
     """
+
+    is_on_device = "on_device" in cli_args and cli_args.on_device
+    if is_on_device and isinstance(model, HubModel):
+        assert isinstance(model.model.producer, hub.CompileJob)
+        return model.model.producer.shapes
     return model.get_input_spec(**get_input_spec_kwargs(model, vars(cli_args)))
 
 
 def get_qcom_chipsets() -> Set[str]:
-    return set(
-        [
-            attr[len("chipset:") :]
-            for dev in hub.get_devices()
-            for attr in dev.attributes
-            if attr.startswith("chipset:qualcomm")
-        ]
-    )
+    try:
+        return set(
+            [
+                attr[len("chipset:") :]
+                for dev in hub.get_devices()
+                for attr in dev.attributes
+                if attr.startswith("chipset:qualcomm")
+            ]
+        )
+    except (APIException, UserError):
+        return set([])
 
 
 def _evaluate_export_common_parser(
@@ -388,28 +400,14 @@ def _evaluate_export_common_parser(
     supports_tflite=True,
     supports_qnn=True,
     supports_ort=True,
+    supports_precompiled_ort=True,
     default_runtime=TargetRuntime.TFLITE,
     exporting_compiled_model=False,
-    default_export_device: str = DEFAULT_EXPORT_DEVICE,
 ) -> argparse.ArgumentParser:
     """
     Common arguments between export and evaluate scripts.
     """
     parser = get_parser()
-    parser.add_argument(
-        "--device",
-        type=str,
-        default=default_export_device,
-        help="Device for which to export.",
-    )
-    parser.add_argument(
-        "--chipset",
-        type=str,
-        default=None,
-        choices=sorted(get_qcom_chipsets(), reverse=True),
-        help="If set, will choose a random device with this chipset. "
-        "Overrides whatever is set in --device.",
-    )
 
     if not exporting_compiled_model:
         # Default runtime for compiled model is fixed for given model
@@ -420,6 +418,8 @@ def _evaluate_export_common_parser(
             available_runtimes.append(TargetRuntime.QNN)
         if supports_ort:
             available_runtimes.append(TargetRuntime.ORT)
+        if supports_precompiled_ort:
+            available_runtimes.append(TargetRuntime.PRECOMPILED_ORT)
 
         default_runtime = _get_default_runtime(available_runtimes)
         add_target_runtime_arg(
@@ -460,6 +460,7 @@ def export_parser(
     supports_tflite: bool = True,
     supports_qnn: bool = True,
     supports_ort: bool = True,
+    supports_precompiled_ort: bool = True,
     default_runtime: TargetRuntime = TargetRuntime.TFLITE,
     exporting_compiled_model: bool = False,
     default_export_device: str = DEFAULT_EXPORT_DEVICE,
@@ -479,6 +480,9 @@ def export_parser(
         supports_ort:
             Whether ORT export is supported.
             Default=True.
+        supports_precompiled_ort:
+            Whether precompiled ORT (with QNN context binary) export is supported.
+            Default=True.
         default_runtime: Which runtime to use as default if not specified in cli args.
         exporting_compiled_model:
             True when exporting compiled model.
@@ -495,9 +499,23 @@ def export_parser(
         supports_tflite=supports_tflite,
         supports_qnn=supports_qnn,
         supports_ort=supports_ort,
+        supports_precompiled_ort=supports_precompiled_ort,
         default_runtime=default_runtime,
         exporting_compiled_model=exporting_compiled_model,
-        default_export_device=default_export_device,
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default=default_export_device,
+        help="Device for which to export.",
+    )
+    parser.add_argument(
+        "--chipset",
+        type=str,
+        default=None,
+        choices=sorted(get_qcom_chipsets(), reverse=True),
+        help="If set, will choose a random device with this chipset. "
+        "Overrides whatever is set in --device.",
     )
     parser.add_argument(
         "--skip-profiling",
@@ -579,6 +597,13 @@ def evaluate_parser(
         supports_ort=supports_ort,
         default_runtime=default_runtime,
     )
+    parser.add_argument(
+        "--chipset",
+        type=str,
+        default="qualcomm-snapdragon-8gen2",
+        choices=sorted(get_qcom_chipsets(), reverse=True),
+        help="Which chipset to use to run evaluation.",
+    )
     parser.add_argument(
         "--split-size",
         type=int,
diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py
index 5f8917d0..adf4b2bc 100644
--- a/qai_hub_models/utils/asset_loaders.py
+++ b/qai_hub_models/utils/asset_loaders.py
@@ -72,6 +72,20 @@ def set_log_level(log_level: int):
         logger.setLevel(old_level)
 
 
+@contextmanager
+def tmp_os_env(env_values: Dict[str, str]):
+    """
+    Creates a context where the os environment variables are replaced with
+        the given values. After exiting the context, the previous env is restored.
+    """
+    previous_env = os.environ.copy()
+    try:
+        os.environ.update(env_values)
+        yield
+    finally:
+        os.environ = previous_env  # type: ignore
+
+
 def _query_yes_no(question, default="yes"):
     """
     Ask a yes/no question and return their answer.
@@ -364,6 +378,7 @@ def __init__(
         dataset_asset_folder: str,
         local_store_path: str,
         qaihm_repo: str,
+        labels_path: str,
         example_use: str,
         huggingface_path: str,
         repo_url: str,
@@ -378,6 +393,7 @@ def __init__(
         self.model_asset_folder = model_asset_folder
         self.dataset_asset_folder = dataset_asset_folder
         self.qaihm_repo = qaihm_repo
+        self.labels_path = labels_path
         self.example_use = example_use
         self.huggingface_path = huggingface_path
         self.repo_url = repo_url
@@ -402,12 +418,7 @@ def get_web_asset_url(self, model_id: str, type: QAIHM_WEB_ASSET):
         return (
             f"{self.asset_url.rstrip('/')}/"
             + (
-                Path(
-                    ModelZooAssetConfig._replace_path_keywords(
-                        self.web_asset_folder.lstrip("/"), model_id=model_id
-                    )
-                )
-                / file
+                Path(self.web_asset_folder.lstrip("/").format(model_id=model_id)) / file
             ).as_posix()
         )
 
@@ -432,8 +443,8 @@ def get_relative_model_asset_path(
         self, model_id: str, version: Union[int, str], file_name: Path | str
     ) -> Path:
         return Path(
-            ModelZooAssetConfig._replace_path_keywords(
-                self.model_asset_folder.lstrip("/"), model_id=model_id, version=version
+            self.model_asset_folder.lstrip("/").format(
+                model_id=model_id, version=version
             )
         ) / Path(file_name)
 
@@ -441,10 +452,8 @@ def get_relative_dataset_asset_path(
         self, dataset_id: str, version: Union[int, str], file_name: Path | str
     ) -> Path:
         return Path(
-            ModelZooAssetConfig._replace_path_keywords(
-                self.dataset_asset_folder.lstrip("/"),
-                dataset_id=dataset_id,
-                version=version,
+            self.dataset_asset_folder.lstrip("/").format(
+                dataset_id=dataset_id, version=version
             )
         ) / Path(file_name)
 
@@ -465,48 +474,25 @@ def get_dataset_asset_url(
             self.get_relative_dataset_asset_path(dataset_id, version, file_name)
         )
 
+    def get_labels_file_path(self, labels_file: str) -> str:
+        return self.labels_path.lstrip("/").format(labels_file=labels_file)
+
     def get_qaihm_repo(self, model_id: str, relative=True) -> Path | str:
-        relative_path = Path(
-            ModelZooAssetConfig._replace_path_keywords(
-                self.qaihm_repo.lstrip("/"), model_id=model_id
-            )
-        )
+        relative_path = Path(self.qaihm_repo.lstrip("/").format(model_id=model_id))
         if not relative:
             return f"{self.repo_url.rstrip('/')}/{relative_path.as_posix()}"
         return relative_path
 
     def get_website_url(self, model_id: str, relative=False) -> Path | str:
         relative_path = Path(
-            ModelZooAssetConfig._replace_path_keywords(
-                self.models_website_relative_path.lstrip("/"), model_id=model_id
-            )
+            self.models_website_relative_path.lstrip("/").format(model_id=model_id)
         )
         if not relative:
             return f"{self.models_website_url.rstrip('/')}/{relative_path.as_posix()}"
         return relative_path
 
     def get_example_use(self, model_id: str) -> str:
-        return ModelZooAssetConfig._replace_path_keywords(
-            self.example_use.lstrip("/"), model_id=model_id
-        )
-
-    ###
-    # Helpers
-    ###
-    @staticmethod
-    def _replace_path_keywords(
-        path: str,
-        model_id: Optional[str] = None,
-        dataset_id: Optional[str] = None,
-        version: Optional[Union[int, str]] = None,
-    ):
-        if model_id:
-            path = path.replace("{model_id}", model_id)
-        if dataset_id:
-            path = path.replace("{dataset_id}", dataset_id)
-        if version:
-            path = path.replace("{version}", str(version))
-        return path
+        return self.example_use.lstrip("/").format(model_id=model_id)
 
     ###
     # Load from CFG
@@ -531,6 +517,7 @@ def from_cfg(
             asset_cfg["dataset_asset_folder"],
             local_store_path,
             asset_cfg["qaihm_repo"],
+            asset_cfg["labels_path"],
             asset_cfg["example_use"],
             asset_cfg["huggingface_path"],
             asset_cfg["repo_url"],
@@ -548,6 +535,7 @@ def from_cfg(
                 "animated_web_banner_filename": str,
                 "model_asset_folder": str,
                 "qaihm_repo": str,
+                "labels_path": str,
                 "example_use": str,
                 "huggingface_path": str,
                 "repo_url": str,
diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py
index b2a55154..377fc357 100644
--- a/qai_hub_models/utils/base_model.py
+++ b/qai_hub_models/utils/base_model.py
@@ -178,6 +178,8 @@ def get_hub_compile_options(
                 target_runtime_flag = "onnx"
             elif target_runtime == TargetRuntime.TFLITE:
                 target_runtime_flag = "tflite"
+            elif target_runtime == TargetRuntime.PRECOMPILED_ORT:
+                target_runtime_flag = "compiled_qnn_onnx"
             else:
                 raise NotImplementedError()
 
diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py
index 3289b900..0c36432f 100644
--- a/qai_hub_models/utils/config_loaders.py
+++ b/qai_hub_models/utils/config_loaders.py
@@ -227,6 +227,10 @@ def map_to_hf_pipeline_tag(self):
 QNN_PATH = "torchscript_onnx_qnn"
 
 
+def bytes_to_mb(num_bytes: int) -> int:
+    return round(num_bytes / (1 << 20))
+
+
 class QAIHMModelPerf:
     """Class to read the perf.yaml and parse it for displaying it on HuggingFace."""
 
@@ -301,12 +305,8 @@ def get_row(self, skip, summary_list, initial_row, model_type, has_assets=True):
             for summary, name in zip(summary_list, names):
                 inf_time = summary["inference_time"]
                 inference_time = f"{inf_time / 1000} ms"
-                mem_min = round(
-                    summary["estimated_peak_memory_range"]["min"] / 1024 / 1024
-                )
-                mem_max = round(
-                    summary["estimated_peak_memory_range"]["max"] / 1024 / 1024
-                )
+                mem_min = bytes_to_mb(summary["estimated_peak_memory_range"]["min"])
+                mem_max = bytes_to_mb(summary["estimated_peak_memory_range"]["max"])
                 peak_memory_range = f"{mem_min} - {mem_max} MB"
                 if model_type == "tflite":
                     self.tflite_inference_time = inference_time
@@ -501,6 +501,7 @@ def __init__(
         inference_metrics: str,
         additional_readme_section: str,
         skip_example_usage: bool,
+        eval_datasets: List[str],
     ) -> None:
         self.is_aimet = is_aimet
         self.has_on_target_demo = has_on_target_demo
@@ -523,6 +524,7 @@ def __init__(
         self.additional_readme_section = additional_readme_section
         self.skip_export = skip_export
         self.skip_example_usage = skip_example_usage
+        self.eval_datasets = eval_datasets
 
     def validate(self) -> Tuple[bool, Optional[str]]:
         """Returns false with a reason if the info spec for this model is not valid."""
@@ -563,6 +565,7 @@ def from_yaml(
             code_gen_config["additional_readme_section"],
             code_gen_config["skip_export"],
             code_gen_config["skip_example_usage"],
+            code_gen_config["eval_datasets"],
         )
 
     # Schema for code-gen.yaml
@@ -591,6 +594,7 @@ def from_yaml(
                 OptionalSchema("additional_readme_section", default=""): str,
                 OptionalSchema("skip_export", default=False): bool,
                 OptionalSchema("skip_example_usage", default=False): bool,
+                OptionalSchema("eval_datasets", default=[]): list,
             }
         )
     )
@@ -634,6 +638,7 @@ def __init__(
         license_type: str,
         deploy_license_type: str,
         dataset: List[str],
+        labels_file: str | None,
         technical_details: Dict[str, str],
     ) -> None:
         self.name = name
@@ -652,6 +657,7 @@ def __init__(
         self.license_type = license_type
         self.deploy_license_type = deploy_license_type
         self.dataset = dataset
+        self.labels_file = labels_file
         self.source_repo = source_repo
         self.applicable_scenarios = applicable_scenarios
         self.related_models = related_models
@@ -724,6 +730,11 @@ def validate(self) -> Tuple[bool, Optional[str]]:
                 "`status_reason` in info.yaml should not be set for public models.",
             )
 
+        # Labels file
+        if self.labels_file is not None:
+            if not os.path.exists(ASSET_CONFIG.get_labels_file_path(self.labels_file)):
+                return False, f"Invalid labels file: {self.labels_file}"
+
         # Required assets exist
         if self.status == MODEL_STATUS.PUBLIC:
             if not os.path.exists(self.get_package_path() / "info.yaml"):
@@ -775,6 +786,11 @@ def get_demo_path(self):
             ASSET_CONFIG.get_qaihm_repo(self.id, relative=False), "demo.py"
         )
 
+    def get_labels_file_path(self):
+        if self.labels_file is None:
+            return None
+        return ASSET_CONFIG.get_labels_file_path(self.labels_file)
+
     def get_info_yaml_path(self, root: Path = QAIHM_PACKAGE_ROOT):
         return self.get_package_path(root) / "info.yaml"
 
@@ -861,6 +877,7 @@ def from_yaml(
             info_yaml["license_type"],
             info_yaml["deploy_license_type"],
             info_yaml["dataset"],
+            info_yaml.get("labels_file", None),
             info_yaml["technical_details"],
         )
 
@@ -891,6 +908,7 @@ def from_yaml(
                 "license_type": str,
                 "deploy_license_type": str,
                 "dataset": list,
+                OptionalSchema("labels_file", default=None): str,
             }
         )
     )
diff --git a/qai_hub_models/utils/evaluate.py b/qai_hub_models/utils/evaluate.py
index 52b8627b..33d8774b 100644
--- a/qai_hub_models/utils/evaluate.py
+++ b/qai_hub_models/utils/evaluate.py
@@ -303,13 +303,13 @@ def evaluate_on_dataset(
     compiled_model: hub.Model,
     torch_model: BaseModel,
     hub_device: hub.Device,
-    dataset_name: str = "imagenette",
-    split_size: int = 2500,
-    num_samples: int = 100,
+    dataset_name: str,
+    split_size: int,
+    num_samples: int,
     seed: int = 42,
     profile_options: str = "",
     use_cache: bool = False,
-) -> None:
+) -> Tuple[str, str]:
     """
     Evaluate model accuracy on a dataset both on device and with PyTorch.
 
@@ -327,6 +327,9 @@ def evaluate_on_dataset(
         use_cache: If set, will upload the full dataset to hub and store a local copy.
             This prevents re-uploading data to hub for each evaluation, with the
             tradeoff of increased initial overhead.
+
+    Returns:
+        Tuple of (torch accuracy, on device accuracy) both as formatted strings.
     """
     assert isinstance(torch_model, EvalModelProtocol), "Model must have an evaluator."
     _validate_inputs(num_samples)
@@ -383,7 +386,10 @@ def evaluate_on_dataset(
             f"Cumulative on device accuracy on batch {i + 1}/{num_batches}: "
             f"{on_device_evaluator.formatted_accuracy()}"
         )
+    torch_accuracy = torch_evaluator.formatted_accuracy()
+    on_device_accuracy = on_device_evaluator.formatted_accuracy()
 
     print("\nFinal accuracy:")
-    print(f"torch: {torch_evaluator.formatted_accuracy()}")
-    print(f"on-device: {on_device_evaluator.formatted_accuracy()}")
+    print(f"torch: {torch_accuracy}")
+    print(f"on-device: {on_device_accuracy}")
+    return (torch_accuracy, on_device_accuracy)
diff --git a/qai_hub_models/utils/inference.py b/qai_hub_models/utils/inference.py
index e46e4583..6c48856b 100644
--- a/qai_hub_models/utils/inference.py
+++ b/qai_hub_models/utils/inference.py
@@ -212,7 +212,9 @@ def compile_zoo_model_to_hub(
 
 
 def compile_model_from_args(
-    model_id: str, cli_args: argparse.Namespace, model_kwargs: Mapping[str, Any]
+    model_id: str,
+    cli_args: argparse.Namespace,
+    model_kwargs: Mapping[str, Any],
 ) -> hub.Model:
     export_file = f"qai_hub_models.models.{model_id}.export"
     export_module = import_module(export_file)
@@ -228,6 +230,7 @@ def compile_model_from_args(
     )
     export_output = export_module.export_model(
         device=cli_args.device,
+        chipset=cli_args.chipset,
         skip_profiling=True,
         skip_inferencing=True,
         skip_downloading=True,
diff --git a/qai_hub_models/utils/printing.py b/qai_hub_models/utils/printing.py
index 95aa9bdc..1074a3d4 100644
--- a/qai_hub_models/utils/printing.py
+++ b/qai_hub_models/utils/printing.py
@@ -14,7 +14,7 @@
 
 from qai_hub_models.utils.base_model import TargetRuntime
 from qai_hub_models.utils.compare import METRICS_FUNCTIONS, generate_comparison_metrics
-from qai_hub_models.utils.config_loaders import QAIHMModelPerf
+from qai_hub_models.utils.config_loaders import QAIHMModelPerf, bytes_to_mb
 from qai_hub_models.utils.qnn_helpers import is_qnn_hub_model
 
 _INFO_DASH = "-" * 60
@@ -122,7 +122,7 @@ def print_profile_metrics(
     details: QAIHMModelPerf.ModelRuntimePerformanceDetails,
 ):
     inf_time = details.inference_time_ms
-    peak_memory_mb = f"[{round(details.peak_memory_bytes[0] / 1e6)}, {round(details.peak_memory_bytes[1] / 1e6)}]"
+    peak_memory_mb = f"[{bytes_to_mb(details.peak_memory_bytes[0])}, {bytes_to_mb(details.peak_memory_bytes[1])}]"
     num_ops = sum(details.compute_unit_counts.values())
     compute_units = [
         f"{unit} ({num_ops} ops)"
diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py
index df55cbb0..22a6a29c 100644
--- a/qai_hub_models/utils/quantization_aimet.py
+++ b/qai_hub_models/utils/quantization_aimet.py
@@ -65,6 +65,7 @@ def _should_tie_observers(op: torch.nn.Module) -> bool:
         nn.Upsample,
         aimet_ops.Concat,
         aimet_ops.Interpolate,
+        aimet_ops.MaxPool2d,
     ]
     for op_type in op_types_to_tie:
         if isinstance(wrapped_op, op_type):
@@ -458,43 +459,17 @@ def get_hub_compile_options(
         other_compile_options: str = "",
         device: Optional[Device] = None,
     ) -> str:
-        compile_options = super().get_hub_compile_options(  # type: ignore
-            target_runtime, other_compile_options, device
-        )
-        if target_runtime != TargetRuntime.ORT:
-            # TODO(#10896): Restore quantize_io flag when targeting ORT
-            compile_options = (
-                compile_options + " --quantize_full_type int8 --quantize_io"
+        quantization_flags = " --quantize_io"
+        if target_runtime not in [TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT]:
+            quantization_flags += " --quantize_full_type int8"
+        return (
+            super().get_hub_compile_options(  # type: ignore
+                target_runtime, other_compile_options, device
             )
-        return compile_options
+            + quantization_flags
+        )
 
     def preferred_hub_source_model_format(
         self, target_runtime: TargetRuntime
     ) -> SourceModelFormat:
         return SourceModelFormat.ONNX
-
-
-def tie_aimet_observer_groups(groups: List[List[Any]]):
-    """
-    Unless you're doing something very customized, you likely want to use
-    the `tie_observers` method instead.
-
-    This defines groups of ops that all should use the same output
-    quantizer observer. The input groups is a list of lists, where the
-    inner lists contain op references that should all use the same output
-    quantizer. Each op should have an `output_quantizers` member.
-
-    Example:
-
-        groups = [
-            [
-                sim.model.net.maxpool2,
-                sim.model.net.Mixed_5b.module_avg_pool2d,
-            ],
-        ]
-        _tie_aimet_observer_groups(groups)
-    """
-    for group in groups:
-        output_quantizer = group[0].output_quantizers[0]
-        for op in group[1:]:
-            op.output_quantizers[0] = output_quantizer
diff --git a/qai_hub_models/utils/scorecard/common.py b/qai_hub_models/utils/scorecard/common.py
index c50b66d9..a8395ec0 100644
--- a/qai_hub_models/utils/scorecard/common.py
+++ b/qai_hub_models/utils/scorecard/common.py
@@ -139,11 +139,6 @@ def get_test_devices(
         return [x for x in devices if x.enabled()] if only_enabled else devices
 
     def get_compile_options(self, aimet_model=False) -> str:
-        if aimet_model and self.get_runtime() == TargetRuntime.ORT:
-            # TODO(#10896): Restore quantize_io flag to
-            # the default set of flags used to target ORT.
-            # This flag can be removed when that happens.
-            return "--quantize_io"
         return ""
 
     def get_job_cache_name(
@@ -251,7 +246,7 @@ def get_test_devices(
                 ScorecardDevice.cs_8_gen_3,
                 ScorecardDevice.cs_x_elite,
                 ScorecardDevice.cs_8550,
-            ] + ([ScorecardDevice.cs_6490] if aimet_model else [])
+            ]
         elif self == ScorecardProfilePath.ORT_DML_GPU:
             devices = [ScorecardDevice.cs_x_elite]
         else:
diff --git a/qai_hub_models/utils/scorecard/job_summary.py b/qai_hub_models/utils/scorecard/job_summary.py
index 77c2938a..f9d0b1cd 100644
--- a/qai_hub_models/utils/scorecard/job_summary.py
+++ b/qai_hub_models/utils/scorecard/job_summary.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import datetime
 from dataclasses import dataclass
 from functools import cached_property
 from typing import Any, Dict, List, Optional, Type, Union, cast
@@ -105,6 +106,12 @@ def quantized(self) -> str:
             else "No"
         )
 
+    @cached_property
+    def date(self) -> Optional[datetime.datetime]:
+        if self.job is None:
+            return None
+        return self.job.date
+
 
 @dataclass
 class CompileJobSummary(JobSummary):
diff --git a/qai_hub_models/utils/scorecard/model_card.py b/qai_hub_models/utils/scorecard/model_card.py
index c812eda1..15849492 100644
--- a/qai_hub_models/utils/scorecard/model_card.py
+++ b/qai_hub_models/utils/scorecard/model_card.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import datetime
 import functools
 import multiprocessing
 import pprint
@@ -93,7 +92,7 @@ def supported_chipsets_santized(chips) -> List[str]:
 __CHIP_SUPPORTED_DEVICES_CACHE: Dict[str, List[str]] = {}
 
 
-def supported_devices(chips) -> List[str]:
+def get_supported_devices(chips) -> List[str]:
     """Return all the supported devices given the chipset being used."""
     supported_devices = set(
         [
@@ -110,7 +109,9 @@ def supported_devices(chips) -> List[str]:
         supported_devices_for_chip = __CHIP_SUPPORTED_DEVICES_CACHE.get(chip, list())
         if not supported_devices_for_chip:
             supported_devices_for_chip = [
-                device.name for device in hub.get_devices(attributes=f"chipset:{chip}")
+                device.name
+                for device in hub.get_devices(attributes=f"chipset:{chip}")
+                if "(Family)" not in device.name
             ]
             __CHIP_SUPPORTED_DEVICES_CACHE[chip] = supported_devices_for_chip
         supported_devices.update(supported_devices_for_chip)
@@ -175,6 +176,7 @@ def get_perf_card(
         exclude_paths: Iterable[ScorecardProfilePath] = [],
     ) -> Dict[str, str | Dict[str, str]]:
         perf_card: Dict[str, str | Dict[str, str]] = {}
+        max_date = None
         for path, run in self.run_per_path.items():
             if (
                 not run.skipped  # Skipped runs are not included
@@ -185,8 +187,17 @@ def get_perf_card(
                 )  # exclude failed jobs if requested
             ):
                 perf_card[path.long_name] = run.performance_metrics
+                if max_date is None:
+                    max_date = run.date
+                elif run.date is not None:
+                    max_date = max(max_date, run.date)
+        if not perf_card:
+            return {}
         perf_card["reference_device_info"] = get_reference_device_info(self.device)
-        perf_card["timestamp"] = datetime.datetime.utcnow().isoformat() + "Z"
+        # The timestamp for the device is the latest creation time among the runs
+        # If max_date is still None for some reason, something went wrong
+        assert max_date is not None
+        perf_card["timestamp"] = max_date.isoformat() + "Z"
         return perf_card
 
     def __repr__(self) -> str:
@@ -225,7 +236,10 @@ def get_perf_card(
     ) -> List[Dict[str, Union[str, Dict[str, str]]]]:
         perf_card = []
         for summary in self.runs_per_device.values():
-            perf_card.append(summary.get_perf_card(include_failed_jobs, exclude_paths))
+            device_summary = summary.get_perf_card(include_failed_jobs, exclude_paths)
+            # If device had no runs, omit it from the card
+            if device_summary:
+                perf_card.append(device_summary)
         return perf_card
 
     def __repr__(self):
@@ -318,7 +332,7 @@ def get_perf_card(
         chips = self.get_chipsets()
         perf_card["aggregated"] = dict(
             supported_oses=supported_oses(),
-            supported_devices=supported_devices(chips),
+            supported_devices=get_supported_devices(chips),
             supported_chipsets=supported_chipsets_santized(chips),
         )
 
diff --git a/scripts/build_and_test.py b/scripts/build_and_test.py
index 471bae6c..a5fa7643 100755
--- a/scripts/build_and_test.py
+++ b/scripts/build_and_test.py
@@ -448,7 +448,6 @@ def test_profile_all_models(
     def test_all_models_long(
         self, plan: Plan, step_id: str = "test_all_models_long"
     ) -> str:
-        # Includes export tests, and creates a fresh environment for each model.
         all_models = get_all_models()
         return plan.add_step(
             step_id,
@@ -458,7 +457,6 @@ def test_all_models_long(
                 all_models,
                 self.venv_path,
                 venv_for_each_model=False,
-                skip_standard_unit_test=True,
                 use_shared_cache=True,
                 test_trace=False,
             ),
diff --git a/scripts/examples/quantize_imagenet_classifier.py b/scripts/examples/quantize_imagenet_classifier.py
index 907e877b..c4abb91f 100644
--- a/scripts/examples/quantize_imagenet_classifier.py
+++ b/scripts/examples/quantize_imagenet_classifier.py
@@ -22,6 +22,7 @@
 )
 from qai_hub_models.models.googlenet_quantized.model import GoogLeNetQuantizable
 from qai_hub_models.models.inception_v3_quantized.model import InceptionNetV3Quantizable
+from qai_hub_models.models.midas_quantized.model import MidasQuantizable
 from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable
 from qai_hub_models.models.mobilenet_v3_large_quantized.model import (
     MobileNetV3LargeQuantizable,
@@ -37,9 +38,10 @@
 from qai_hub_models.models.wideresnet50_quantized.model import WideResNet50Quantizable
 from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
 
-CLASSIFIERS = {
+MODELS = {
     "googlenet": GoogLeNetQuantizable,
     "inception_v3": InceptionNetV3Quantizable,
+    "midas": MidasQuantizable,
     "mobilenet_v2": MobileNetV2Quantizable,
     "mobilenet_v3_large": MobileNetV3LargeQuantizable,
     "regnet": RegNetQuantizable,
@@ -55,6 +57,10 @@
     "convnext_tiny_w8a16": ConvNextTinyW8A16Quantizable,
 }
 
+# These models are quantized by imagenet data, but are not classifiers
+# Don't try to compute accuracy for these models
+NON_CLASSIFIERS = ["midas"]
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -82,7 +88,7 @@
         "--model",
         "-m",
         type=str,
-        choices=list(CLASSIFIERS.keys()),
+        choices=list(MODELS.keys()),
         required=False,
         help="Name of the model to quantize.",
     )
@@ -101,17 +107,18 @@
     )
     args = parser.parse_args()
     if args.all:
-        ImageNetClassifier_classes = CLASSIFIERS.values()
+        ImageNetClassifier_classes = MODELS.values()
     else:
         if not hasattr(args, "model"):
             raise ValueError(
                 "Specify a model via --model <model> or all models via --all"
             )
-        ImageNetClassifier_classes = [CLASSIFIERS[args.model]]
+        ImageNetClassifier_classes = [MODELS[args.model]]
 
     dataset = ImagenetteDataset()
     torch.manual_seed(args.seed)
     dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
+    skip_accuracy = args.model in NON_CLASSIFIERS
 
     for ImageNetClassifier_cls in ImageNetClassifier_classes:
         model: AIMETQuantizableMixin = ImageNetClassifier_cls.from_pretrained(
@@ -119,24 +126,23 @@
         )
         print(f"\nQuantizing {ImageNetClassifier_cls.__name__}")
 
-        evaluator = model.get_evaluator()
-
-        evaluator.reset()
-        evaluator.add_from_dataset(model, dataloader, args.num_iter)
-        accuracy_fp32 = evaluator.get_accuracy_score()
+        if not skip_accuracy:
+            evaluator = model.get_evaluator()
+            evaluator.reset()
+            evaluator.add_from_dataset(model, dataloader, args.num_iter)
+            accuracy_fp32 = evaluator.get_accuracy_score()
+            print(f"FP32 Accuracy: {accuracy_fp32 * 100:.3g}%")
 
         model.quantize(dataloader, args.num_iter, data_has_gt=True)
 
-        evaluator.reset()
-        evaluator.add_from_dataset(model, dataloader, args.num_iter)
-        accuracy_int8 = evaluator.get_accuracy_score()
+        if not skip_accuracy:
+            evaluator = model.get_evaluator()
+            evaluator.add_from_dataset(model, dataloader, args.num_iter)
+            accuracy_int8 = evaluator.get_accuracy_score()
 
-        print(f"FP32 Accuracy: {accuracy_fp32 * 100:.3g}%")
-        print(f"INT8 Accuracy: {accuracy_int8 * 100:.3g}%")
+            print(f"INT8 Accuracy: {accuracy_int8 * 100:.3g}%")
 
         output_path = args.output_dir or str(Path() / "build")
-        output_name = (
-            args.output_name or f"{ImageNetClassifier_cls.__name__}_quantized_encodings"
-        )
+        output_name = args.output_name or f"{args.model}_quantized_encodings"
         model.quant_sim.save_encodings_to_json(output_path, output_name)
-        print(f"Wrote {output_path}/{output_name}\n")
+        print(f"Wrote {output_path}/{output_name}.json\n")
diff --git a/scripts/examples/quantize_hrnet.py b/scripts/examples/quantize_pose_detector.py
similarity index 74%
rename from scripts/examples/quantize_hrnet.py
rename to scripts/examples/quantize_pose_detector.py
index 835beb6c..01fd9745 100644
--- a/scripts/examples/quantize_hrnet.py
+++ b/scripts/examples/quantize_pose_detector.py
@@ -14,11 +14,20 @@
 
 from qai_hub_models.datasets.coco import CocoDataset
 from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp
+from qai_hub_models.models.hrnet_pose.model import HRNetPose
 from qai_hub_models.models.hrnet_pose_quantized.model import HRNetPoseQuantizable
+from qai_hub_models.models.posenet_mobilenet_quantized.model import (
+    PosenetMobilenetQuantizable,
+)
+
+MODELS = {
+    "hrnet_pose": HRNetPoseQuantizable,
+    "posenet_mobilenet": PosenetMobilenetQuantizable,
+}
 
 
 # Create custom data loader for this model that does the preprocessing
-class HRNetCocoDataset(CocoDataset):
+class PoseCocoDataset(CocoDataset):
     def __init__(self, preprocess_lambda, target_image_size=640):
         super().__init__(target_image_size)
         self.preprocess_lambda = preprocess_lambda
@@ -41,6 +50,13 @@ def __getitem__(self, item):
         default=None,
         help="Directory where encodings should be stored. Defaults to ./build.",
     )
+    parser.add_argument(
+        "--model",
+        type=str,
+        choices=MODELS.keys(),
+        required=True,
+        help="Name of the model to quantize.",
+    )
     parser.add_argument(
         "--output-name",
         type=str,
@@ -56,11 +72,13 @@ def __getitem__(self, item):
     args = parser.parse_args()
     torch.manual_seed(args.seed)
 
-    model = HRNetPoseQuantizable.from_pretrained(aimet_encodings=None)
-    app = HRNetPoseApp(model)
+    model_cls = MODELS[args.model]
+    model = model_cls.from_pretrained(aimet_encodings=None)
 
     # Initialize Data Loader
-    dataset = HRNetCocoDataset(app.preprocess_input)
+    dataset = PoseCocoDataset(
+        HRNetPoseApp(HRNetPose.from_pretrained()).preprocess_input
+    )
 
     # TODO(10491) Add metrics computation here
 
@@ -69,5 +87,5 @@ def __getitem__(self, item):
 
     # Export encodings
     output_path = args.output_dir or str(Path() / "build")
-    output_name = args.output_name or "hrnet_pose_quantized_encodings"
+    output_name = args.output_name or f"{args.model}_quantized_encodings"
     model.quant_sim.save_encodings_to_json(output_path, output_name)
diff --git a/scripts/examples/quantize_superresolution.py b/scripts/examples/quantize_superresolution.py
index 6807c7d2..055ee17e 100644
--- a/scripts/examples/quantize_superresolution.py
+++ b/scripts/examples/quantize_superresolution.py
@@ -23,6 +23,7 @@
 from qai_hub_models.models.quicksrnetsmall_quantized.model import (
     QuickSRNetSmallQuantizable,
 )
+from qai_hub_models.models.sesr_m5_quantized.model import SESR_M5Quantizable
 from qai_hub_models.models.xlsr_quantized.model import XLSRQuantizable
 
 from qai_hub_models.utils.quantization_aimet import (  # isort: skip
@@ -34,6 +35,7 @@
     "quicksrnetsmall": QuickSRNetSmallQuantizable,
     "quicksrnetmedium": QuickSRNetMediumQuantizable,
     "quicksrnetlarge": QuickSRNetLargeQuantizable,
+    "sesr_m5": SESR_M5Quantizable,
 }
 
 
@@ -77,11 +79,13 @@
     parser.add_argument(
         "--scale-factor",
         type=int,
-        default=4,
+        default=3,
         help="Scaling factor of the model.",
     )
     args = parser.parse_args()
-    model = MODELS[args.model].from_pretrained(aimet_encodings=None)
+    model = MODELS[args.model].from_pretrained(
+        aimet_encodings=None, scale_factor=args.scale_factor
+    )
 
     # Load dataset
     dataset = BSD300Dataset(scaling_factor=args.scale_factor)
diff --git a/scripts/tasks/changes.py b/scripts/tasks/changes.py
index bebe6068..f4eb73fb 100644
--- a/scripts/tasks/changes.py
+++ b/scripts/tasks/changes.py
@@ -260,4 +260,14 @@ def get_all_models() -> Iterable[str]:
     for model_name in os.listdir(PY_PACKAGE_MODELS_ROOT):
         if os.path.exists(os.path.join(PY_PACKAGE_MODELS_ROOT, model_name, "model.py")):
             model_names.add(model_name)
+
+    # Select a subset of models based on user input
+    allowed_models = os.environ.get("QAIHM_TEST_MODELS", None)
+    if allowed_models and allowed_models.upper() != "ALL":
+        allowed_models = allowed_models.split(",")
+        for model in allowed_models:
+            if model not in model_names:
+                raise ValueError(f"Unknown model selected: {model}")
+        model_names = allowed_models
+
     return model_names
diff --git a/scripts/tasks/util.py b/scripts/tasks/util.py
index 5f6bc438..9625ff3b 100644
--- a/scripts/tasks/util.py
+++ b/scripts/tasks/util.py
@@ -36,9 +36,7 @@ def new_cd(x):
 
 
 def can_support_aimet(platform: str = sys.platform) -> bool:
-    return (
-        platform == "linux" or platform == "linux2"
-    ) and sys.version_info.minor == 8  # python 3.8 only
+    return platform == "linux" or platform == "linux2"
 
 
 def model_needs_aimet(model_name: str) -> bool: