Skip to content

Commit

Permalink
v0.18.0
Browse files Browse the repository at this point in the history
See https://github.com/quic/ai-hub-models/releases/v0.18.0 for changelog.

Signed-off-by: QAIHM Team <[email protected]>
  • Loading branch information
qaihm-bot committed Nov 13, 2024
1 parent 5dae825 commit 2fc5329
Show file tree
Hide file tree
Showing 406 changed files with 26,510 additions and 20,117 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ repos:
- id: black
additional_dependencies: ['click==8.0.4']
- repo: https://github.com/pycqa/flake8
rev: 6.1.0
rev: 7.1.1
hooks:
- id: flake8
- repo: local
Expand Down
607 changes: 248 additions & 359 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion qai_hub_models/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# ---------------------------------------------------------------------
__version__ = "0.17.0"
__version__ = "0.18.0"
6 changes: 3 additions & 3 deletions qai_hub_models/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from .bsd300 import BSD300Dataset
from .coco import CocoDataset
from .common import BaseDataset
from .common import BaseDataset, DatasetSplit
from .imagenet import ImagenetDataset
from .imagenette import ImagenetteDataset
from .pascal_voc import VOCSegmentationDataset
Expand All @@ -24,6 +24,6 @@
}


def get_dataset_from_name(name: str) -> BaseDataset:
def get_dataset_from_name(name: str, split: DatasetSplit) -> BaseDataset:
dataset_cls = DATASET_NAME_MAP[name]
return dataset_cls() # type: ignore
return dataset_cls(split=split) # type: ignore
70 changes: 43 additions & 27 deletions qai_hub_models/datasets/bsd300.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from __future__ import annotations

import os
from itertools import chain

import numpy as np
import torch
from PIL import Image

from qai_hub_models.datasets.common import BaseDataset
from qai_hub_models.datasets.common import BaseDataset, DatasetSplit
from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset

BSD300_URL = (
Expand All @@ -21,63 +22,78 @@
BSD300_ASSET = CachedWebDatasetAsset(
BSD300_URL, BSD300_FOLDER_NAME, BSD300_VERSION, "BSDS300.tgz"
)
DATASET_LENGTH = 200
NUM_TEST_IMAGES = 100
NUM_TRAIN_IMAGES = 200


class BSD300Dataset(BaseDataset):
"""
BSD300 published here: https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/
"""

def __init__(self, scaling_factor=4):
def __init__(
self,
input_height: int = 128,
input_width: int = 128,
scaling_factor: int = 4,
split: DatasetSplit = DatasetSplit.TRAIN,
):
self.bsd_path = BSD300_ASSET.path(extracted=True)
self.images_path = self.bsd_path / "images" / "train"
BaseDataset.__init__(self, self.bsd_path)

# bsd300 doesn't have a val split, so use the test split for this purpose
split = DatasetSplit.TEST if split == DatasetSplit.VAL else split

BaseDataset.__init__(self, self.bsd_path, split)
self.scaling_factor = scaling_factor
self.input_height = input_height
self.input_width = input_width
self.image_files = sorted(os.listdir(self.images_path))

def _validate_data(self) -> bool:
# Check image path exists
self.images_path = self.bsd_path / "images" / self.split_str
if not self.images_path.exists():
return False

# Ensure the correct number of images are there
images = [f for f in self.images_path.iterdir() if ".jpg" in f.name]
if len(images) != DATASET_LENGTH:
images = [f for f in self.images_path.iterdir() if ".png" in f.name]
expected_num_images = len(self)
if len(images) != expected_num_images:
return False

return True

def _prepare_data(self):
# Rename images to be more friendly to enumeration
# directory = os.path.join(self.dataset_path, "images/train")
# files = os.listdir(directory)
for i, filepath in enumerate(self.images_path.iterdir()):
"""Convert jpg to png."""
train_path = self.bsd_path / "images" / "train"
test_path = self.bsd_path / "images" / "test"
for i, filepath in enumerate(chain(train_path.iterdir(), test_path.iterdir())):
if filepath.name.endswith(".jpg"):
# Open the image and convert it to png
try:
with Image.open(filepath) as img:
img.save(self.images_path / f"img_{i + 1:03d}_HR.jpg")
# delete the old image
os.remove(filepath)
except ValueError:
print(f"File {filepath} does not exist!")
with Image.open(filepath) as img:
img.save(filepath.parent / f"img_{i + 1:03d}_HR.png")
# delete the old image
os.remove(filepath)

def __len__(self):
return DATASET_LENGTH
return NUM_TRAIN_IMAGES if self.split_str == "train" else NUM_TEST_IMAGES

def __getitem__(self, item) -> tuple[torch.Tensor, torch.Tensor]:
# We use the super resolution GT-and-test image preparation from AIMET zoo:
# https://github.com/quic/aimet-model-zoo/blob/d09d2b0404d10f71a7640a87e9d5e5257b028802/aimet_zoo_torch/quicksrnet/dataloader/utils.py#L51

img = np.asarray(
Image.open(os.path.join(self.images_path, f"img_{item + 1:03d}_HR.jpg"))
img = Image.open(os.path.join(self.images_path, self.image_files[item]))
img = img.resize(
(
self.input_width * self.scaling_factor,
self.input_height * self.scaling_factor,
)
)
height, width = img.shape[0:2]
img_arr = np.asarray(img)
height, width = img_arr.shape[0:2]

# If portrait, transpose to landscape so that all tensors are equal size
if height > width:
img = np.transpose(img, (1, 0, 2))
height, width = img.shape[0:2]
img_arr = np.transpose(img_arr, (1, 0, 2))
height, width = img_arr.shape[0:2]

# Take the largest possible center-crop of it such that its dimensions are perfectly divisible by the scaling factor
x_remainder = width % (
Expand All @@ -94,7 +110,7 @@ def __getitem__(self, item) -> tuple[torch.Tensor, torch.Tensor]:
top = int(y_remainder // 2)
right = int(left + (width - x_remainder))
bottom = int(top + (height - y_remainder))
hr_img = img[top:bottom, left:right]
hr_img = img_arr[top:bottom, left:right]

hr_height, hr_width = hr_img.shape[0:2]

Expand Down
159 changes: 99 additions & 60 deletions qai_hub_models/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,21 @@
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# ---------------------------------------------------------------------
import os
from pathlib import Path
from typing import Union

import torch
import torch.nn.functional as F
from fiftyone.core.sample import SampleView
from PIL import Image
from torch.utils.data.dataloader import default_collate
from torchvision.datasets.coco import CocoDetection

from qai_hub_models.datasets.common import BaseDataset
from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset
from qai_hub_models.datasets.common import BaseDataset, DatasetSplit, setup_fiftyone_env
from qai_hub_models.utils.image_processing import app_to_net_image_inputs
from qai_hub_models.utils.path_helpers import get_qaihm_package_root

DATASET_ID = "coco"
DATASET_ASSET_VERSION = 1
COCO_DATASET = CachedWebDatasetAsset(
"http://images.cocodataset.org/zips/val2017.zip",
DATASET_ID,
DATASET_ASSET_VERSION,
"val2017.zip",
)
COCO_ANNOTATIONS = CachedWebDatasetAsset(
"http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
DATASET_ID,
DATASET_ASSET_VERSION,
"annotations_trainval2017.zip",
)


def collate_fn(batch):
Expand All @@ -45,81 +35,130 @@ def collate_fn(batch):
new_list.append(target)
return new_list
except Exception:
return [], ([], [], [], [], [])
return [], ([], [], [], [], [], [])


class CocoDataset(BaseDataset, CocoDetection):
class CocoDataset(BaseDataset):
"""
Class for using the COCODetection dataset published here:
Wrapper class around COCO dataset https://cocodataset.org/
Contains object detection samples and labels spanning 80 classes.
Contains ~5k images spanning 80 classes.
This wrapper supports the train and val splits of the 2017 version.
"""

def __init__(self, target_image_size: Union[int, tuple[int, int]] = 640):
BaseDataset.__init__(self, str(COCO_DATASET.path(extracted=True)))
CocoDetection.__init__(
self,
root=COCO_DATASET.path() / "val2017",
annFile=COCO_ANNOTATIONS.path() / "annotations" / "instances_val2017.json",
)
def __init__(
self,
target_image_size: Union[int, tuple[int, int]] = 640,
split: DatasetSplit = DatasetSplit.TRAIN,
max_boxes: int = 100,
num_samples: int = 5000,
):
"""
Parameters:
target_image_size: The size to which the input images will be resized.
split: Whether to use the train or val split of the dataset.
max_boxes: The maximum number of boxes for a given sample. Used so that
when loading multiple samples in a batch via a dataloader, this will
be the tensor dimension.
If a sample has fewer than this many boxes, the tensor of boxes
will be zero padded up to this amount.
If a sample has more than this many boxes, an exception is thrown.
num_samples: Number of data samples to download. Needs to be specified
during initialization because only as many samples as requested
are downloaded.
"""
self.num_samples = num_samples

# FiftyOne package manages dataset so pass a dummy name for data path
BaseDataset.__init__(self, "non_existent_dir", split)

categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x["id"])
self.label_map = {}
counter = 0
for c in categories:
self.label_map[c["id"]] = counter
counter += 1
self.label_map = {}
with open(get_qaihm_package_root() / "labels" / "coco_labels.txt") as f:
for line in f.readlines():
self.label_map[line.strip()] = counter
counter += 1

self.target_image_size = (
target_image_size
if isinstance(target_image_size, tuple)
else (target_image_size, target_image_size)
)
self.max_boxes = max_boxes

def __getitem__(self, item):
image, target = super().__getitem__(item)
"""
Returns a tuple of input image tensor and label data.
Label data is a tuple with the following entries:
- Image ID within the original dataset
- height (in pixels)
- width (in pixels)
- bounding box data with shape (self.max_boxes, 4)
- The 4 should be normalized (x, y, w, h)
- labels with shape (self.max_boxes,)
- number of actual boxes present
"""
sample = self.dataset[item : item + 1].first()
assert isinstance(sample, SampleView)
image = Image.open(sample.filepath).convert("RGB")
width, height = image.size
boxes = []
labels = []
for annotation in target:
bbox = annotation.get("bbox")
boxes.append(
[
bbox[0] / width,
bbox[1] / height,
(bbox[0] + bbox[2]) / width,
(bbox[1] + bbox[3]) / height,
]
)
labels.append(self.label_map[annotation.get("category_id")])
if sample.ground_truth is not None:
for annotation in sample.ground_truth.detections:
if annotation.label not in self.label_map:
print(f"Warning: Invalid label {annotation.label}")
continue
x, y, w, h = annotation.bounding_box
boxes.append([x, y, x + w, y + h])
# Convert string label to int idx
labels.append(self.label_map[annotation.label])
boxes = torch.tensor(boxes)
labels = torch.tensor(labels)

# Pad the number of boxes to a standard value
num_boxes = len(labels)
if num_boxes == 0:
boxes = torch.zeros((100, 4))
labels = torch.zeros(100)
elif num_boxes > self.max_boxes:
raise ValueError(
f"Sample has more boxes than max boxes {self.max_boxes}. "
"Re-initialize the dataset with a larger value for max_boxes."
)
else:
boxes = F.pad(boxes, (0, 0, 0, self.max_boxes - num_boxes), value=0)
labels = F.pad(labels, (0, self.max_boxes - num_boxes), value=0)

image = image.resize(self.target_image_size)
image = app_to_net_image_inputs(image)[1].squeeze(0)
return image, (
target[0]["image_id"] if len(target) > 0 else 0,
int(Path(sample.filepath).name[:-4]),
height,
width,
boxes,
labels,
torch.tensor([num_boxes]),
)

def _validate_data(self) -> bool:
# Check validation data exists
if not (COCO_DATASET.path() / "val2017").exists():
return False
def __len__(self) -> int:
return len(self.dataset)

# Check annotations exist
if not COCO_ANNOTATIONS.path().exists():
return False
def _validate_data(self) -> bool:
return hasattr(self, "dataset")

# Ensure there are 5000 samples
if len(os.listdir(COCO_DATASET.path() / "val2017")) < 5000:
return False
def _download_data(self) -> None:
setup_fiftyone_env()

return True
# This is an expensive import, so don't want to unnecessarily import it in
# other files that import datasets/__init__.py
import fiftyone.zoo as foz

def _download_data(self) -> None:
COCO_DATASET.fetch(extract=True)
COCO_ANNOTATIONS.fetch(extract=True)
split_str = "validation" if self.split == DatasetSplit.VAL else "train"
self.dataset = foz.load_zoo_dataset(
"coco-2017", split=split_str, max_samples=self.num_samples, shuffle=True
)
Loading

0 comments on commit 2fc5329

Please sign in to comment.