v0.18.0

See https://github.com/quic/ai-hub-models/releases/v0.18.0 for changelog. Signed-off-by: QAIHM Team <[email protected]>
quic · Nov 13, 2024 · 2fc5329 · 2fc5329
1 parent 5dae825
commit 2fc5329
Show file tree

Hide file tree

Showing 406 changed files with 26,510 additions and 20,117 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -74,7 +74,7 @@ repos:
     -   id: black
         additional_dependencies: ['click==8.0.4']
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.1.1
     hooks:
     -   id: flake8
 -   repo: local

diff --git a/README.md b/README.md
diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-__version__ = "0.17.0"
+__version__ = "0.18.0"
diff --git a/qai_hub_models/datasets/__init__.py b/qai_hub_models/datasets/__init__.py
@@ -6,7 +6,7 @@
 
 from .bsd300 import BSD300Dataset
 from .coco import CocoDataset
-from .common import BaseDataset
+from .common import BaseDataset, DatasetSplit
 from .imagenet import ImagenetDataset
 from .imagenette import ImagenetteDataset
 from .pascal_voc import VOCSegmentationDataset
@@ -24,6 +24,6 @@
 }
 
 
-def get_dataset_from_name(name: str) -> BaseDataset:
+def get_dataset_from_name(name: str, split: DatasetSplit) -> BaseDataset:
     dataset_cls = DATASET_NAME_MAP[name]
-    return dataset_cls()  # type: ignore
+    return dataset_cls(split=split)  # type: ignore
diff --git a/qai_hub_models/datasets/bsd300.py b/qai_hub_models/datasets/bsd300.py
@@ -5,12 +5,13 @@
 from __future__ import annotations
 
 import os
+from itertools import chain
 
 import numpy as np
 import torch
 from PIL import Image
 
-from qai_hub_models.datasets.common import BaseDataset
+from qai_hub_models.datasets.common import BaseDataset, DatasetSplit
 from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset
 
 BSD300_URL = (
@@ -21,63 +22,78 @@
 BSD300_ASSET = CachedWebDatasetAsset(
     BSD300_URL, BSD300_FOLDER_NAME, BSD300_VERSION, "BSDS300.tgz"
 )
-DATASET_LENGTH = 200
+NUM_TEST_IMAGES = 100
+NUM_TRAIN_IMAGES = 200
 
 
 class BSD300Dataset(BaseDataset):
     """
     BSD300 published here: https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/
     """
 
-    def __init__(self, scaling_factor=4):
+    def __init__(
+        self,
+        input_height: int = 128,
+        input_width: int = 128,
+        scaling_factor: int = 4,
+        split: DatasetSplit = DatasetSplit.TRAIN,
+    ):
         self.bsd_path = BSD300_ASSET.path(extracted=True)
-        self.images_path = self.bsd_path / "images" / "train"
-        BaseDataset.__init__(self, self.bsd_path)
+
+        # bsd300 doesn't have a val split, so use the test split for this purpose
+        split = DatasetSplit.TEST if split == DatasetSplit.VAL else split
+
+        BaseDataset.__init__(self, self.bsd_path, split)
         self.scaling_factor = scaling_factor
+        self.input_height = input_height
+        self.input_width = input_width
+        self.image_files = sorted(os.listdir(self.images_path))
 
     def _validate_data(self) -> bool:
         # Check image path exists
+        self.images_path = self.bsd_path / "images" / self.split_str
         if not self.images_path.exists():
             return False
 
         # Ensure the correct number of images are there
-        images = [f for f in self.images_path.iterdir() if ".jpg" in f.name]
-        if len(images) != DATASET_LENGTH:
+        images = [f for f in self.images_path.iterdir() if ".png" in f.name]
+        expected_num_images = len(self)
+        if len(images) != expected_num_images:
             return False
 
         return True
 
     def _prepare_data(self):
-        # Rename images to be more friendly to enumeration
-        # directory = os.path.join(self.dataset_path, "images/train")
-        # files = os.listdir(directory)
-        for i, filepath in enumerate(self.images_path.iterdir()):
+        """Convert jpg to png."""
+        train_path = self.bsd_path / "images" / "train"
+        test_path = self.bsd_path / "images" / "test"
+        for i, filepath in enumerate(chain(train_path.iterdir(), test_path.iterdir())):
             if filepath.name.endswith(".jpg"):
-                # Open the image and convert it to png
-                try:
-                    with Image.open(filepath) as img:
-                        img.save(self.images_path / f"img_{i + 1:03d}_HR.jpg")
-                    # delete the old image
-                    os.remove(filepath)
-                except ValueError:
-                    print(f"File {filepath} does not exist!")
+                with Image.open(filepath) as img:
+                    img.save(filepath.parent / f"img_{i + 1:03d}_HR.png")
+                # delete the old image
+                os.remove(filepath)
 
     def __len__(self):
-        return DATASET_LENGTH
+        return NUM_TRAIN_IMAGES if self.split_str == "train" else NUM_TEST_IMAGES
 
     def __getitem__(self, item) -> tuple[torch.Tensor, torch.Tensor]:
         # We use the super resolution GT-and-test image preparation from AIMET zoo:
         # https://github.com/quic/aimet-model-zoo/blob/d09d2b0404d10f71a7640a87e9d5e5257b028802/aimet_zoo_torch/quicksrnet/dataloader/utils.py#L51
-
-        img = np.asarray(
-            Image.open(os.path.join(self.images_path, f"img_{item + 1:03d}_HR.jpg"))
+        img = Image.open(os.path.join(self.images_path, self.image_files[item]))
+        img = img.resize(
+            (
+                self.input_width * self.scaling_factor,
+                self.input_height * self.scaling_factor,
+            )
         )
-        height, width = img.shape[0:2]
+        img_arr = np.asarray(img)
+        height, width = img_arr.shape[0:2]
 
         # If portrait, transpose to landscape so that all tensors are equal size
         if height > width:
-            img = np.transpose(img, (1, 0, 2))
-            height, width = img.shape[0:2]
+            img_arr = np.transpose(img_arr, (1, 0, 2))
+            height, width = img_arr.shape[0:2]
 
         # Take the largest possible center-crop of it such that its dimensions are perfectly divisible by the scaling factor
         x_remainder = width % (
@@ -94,7 +110,7 @@ def __getitem__(self, item) -> tuple[torch.Tensor, torch.Tensor]:
         top = int(y_remainder // 2)
         right = int(left + (width - x_remainder))
         bottom = int(top + (height - y_remainder))
-        hr_img = img[top:bottom, left:right]
+        hr_img = img_arr[top:bottom, left:right]
 
         hr_height, hr_width = hr_img.shape[0:2]
 

diff --git a/qai_hub_models/datasets/coco.py b/qai_hub_models/datasets/coco.py
@@ -2,31 +2,21 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-import os
+from pathlib import Path
 from typing import Union
 
 import torch
+import torch.nn.functional as F
+from fiftyone.core.sample import SampleView
+from PIL import Image
 from torch.utils.data.dataloader import default_collate
-from torchvision.datasets.coco import CocoDetection
 
-from qai_hub_models.datasets.common import BaseDataset
-from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset
+from qai_hub_models.datasets.common import BaseDataset, DatasetSplit, setup_fiftyone_env
 from qai_hub_models.utils.image_processing import app_to_net_image_inputs
+from qai_hub_models.utils.path_helpers import get_qaihm_package_root
 
 DATASET_ID = "coco"
 DATASET_ASSET_VERSION = 1
-COCO_DATASET = CachedWebDatasetAsset(
-    "http://images.cocodataset.org/zips/val2017.zip",
-    DATASET_ID,
-    DATASET_ASSET_VERSION,
-    "val2017.zip",
-)
-COCO_ANNOTATIONS = CachedWebDatasetAsset(
-    "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
-    DATASET_ID,
-    DATASET_ASSET_VERSION,
-    "annotations_trainval2017.zip",
-)
 
 
 def collate_fn(batch):
@@ -45,81 +35,130 @@ def collate_fn(batch):
         new_list.append(target)
         return new_list
     except Exception:
-        return [], ([], [], [], [], [])
+        return [], ([], [], [], [], [], [])
 
 
-class CocoDataset(BaseDataset, CocoDetection):
+class CocoDataset(BaseDataset):
     """
-    Class for using the COCODetection dataset published here:
+    Wrapper class around COCO dataset https://cocodataset.org/
 
+    Contains object detection samples and labels spanning 80 classes.
 
-    Contains ~5k images spanning 80 classes.
+    This wrapper supports the train and val splits of the 2017 version.
     """
 
-    def __init__(self, target_image_size: Union[int, tuple[int, int]] = 640):
-        BaseDataset.__init__(self, str(COCO_DATASET.path(extracted=True)))
-        CocoDetection.__init__(
-            self,
-            root=COCO_DATASET.path() / "val2017",
-            annFile=COCO_ANNOTATIONS.path() / "annotations" / "instances_val2017.json",
-        )
+    def __init__(
+        self,
+        target_image_size: Union[int, tuple[int, int]] = 640,
+        split: DatasetSplit = DatasetSplit.TRAIN,
+        max_boxes: int = 100,
+        num_samples: int = 5000,
+    ):
+        """
+        Parameters:
+            target_image_size: The size to which the input images will be resized.
+            split: Whether to use the train or val split of the dataset.
+            max_boxes: The maximum number of boxes for a given sample. Used so that
+                when loading multiple samples in a batch via a dataloader, this will
+                be the tensor dimension.
+
+                If a sample has fewer than this many boxes, the tensor of boxes
+                will be zero padded up to this amount.
+
+                If a sample has more than this many boxes, an exception is thrown.
+            num_samples: Number of data samples to download. Needs to be specified
+                during initialization because only as many samples as requested
+                are downloaded.
+        """
+        self.num_samples = num_samples
+
+        # FiftyOne package manages dataset so pass a dummy name for data path
+        BaseDataset.__init__(self, "non_existent_dir", split)
 
-        categories = self.coco.loadCats(self.coco.getCatIds())
-        categories.sort(key=lambda x: x["id"])
-        self.label_map = {}
         counter = 0
-        for c in categories:
-            self.label_map[c["id"]] = counter
-            counter += 1
+        self.label_map = {}
+        with open(get_qaihm_package_root() / "labels" / "coco_labels.txt") as f:
+            for line in f.readlines():
+                self.label_map[line.strip()] = counter
+                counter += 1
+
         self.target_image_size = (
             target_image_size
             if isinstance(target_image_size, tuple)
             else (target_image_size, target_image_size)
         )
+        self.max_boxes = max_boxes
 
     def __getitem__(self, item):
-        image, target = super().__getitem__(item)
+        """
+        Returns a tuple of input image tensor and label data.
+
+        Label data is a tuple with the following entries:
+          - Image ID within the original dataset
+          - height (in pixels)
+          - width (in pixels)
+          - bounding box data with shape (self.max_boxes, 4)
+            - The 4 should be normalized (x, y, w, h)
+          - labels with shape (self.max_boxes,)
+          - number of actual boxes present
+        """
+        sample = self.dataset[item : item + 1].first()
+        assert isinstance(sample, SampleView)
+        image = Image.open(sample.filepath).convert("RGB")
         width, height = image.size
         boxes = []
         labels = []
-        for annotation in target:
-            bbox = annotation.get("bbox")
-            boxes.append(
-                [
-                    bbox[0] / width,
-                    bbox[1] / height,
-                    (bbox[0] + bbox[2]) / width,
-                    (bbox[1] + bbox[3]) / height,
-                ]
-            )
-            labels.append(self.label_map[annotation.get("category_id")])
+        if sample.ground_truth is not None:
+            for annotation in sample.ground_truth.detections:
+                if annotation.label not in self.label_map:
+                    print(f"Warning: Invalid label {annotation.label}")
+                    continue
+                x, y, w, h = annotation.bounding_box
+                boxes.append([x, y, x + w, y + h])
+                # Convert string label to int idx
+                labels.append(self.label_map[annotation.label])
         boxes = torch.tensor(boxes)
         labels = torch.tensor(labels)
+
+        # Pad the number of boxes to a standard value
+        num_boxes = len(labels)
+        if num_boxes == 0:
+            boxes = torch.zeros((100, 4))
+            labels = torch.zeros(100)
+        elif num_boxes > self.max_boxes:
+            raise ValueError(
+                f"Sample has more boxes than max boxes {self.max_boxes}. "
+                "Re-initialize the dataset with a larger value for max_boxes."
+            )
+        else:
+            boxes = F.pad(boxes, (0, 0, 0, self.max_boxes - num_boxes), value=0)
+            labels = F.pad(labels, (0, self.max_boxes - num_boxes), value=0)
+
         image = image.resize(self.target_image_size)
         image = app_to_net_image_inputs(image)[1].squeeze(0)
         return image, (
-            target[0]["image_id"] if len(target) > 0 else 0,
+            int(Path(sample.filepath).name[:-4]),
             height,
             width,
             boxes,
             labels,
+            torch.tensor([num_boxes]),
         )
 
-    def _validate_data(self) -> bool:
-        # Check validation data exists
-        if not (COCO_DATASET.path() / "val2017").exists():
-            return False
+    def __len__(self) -> int:
+        return len(self.dataset)
 
-        # Check annotations exist
-        if not COCO_ANNOTATIONS.path().exists():
-            return False
+    def _validate_data(self) -> bool:
+        return hasattr(self, "dataset")
 
-        # Ensure there are 5000 samples
-        if len(os.listdir(COCO_DATASET.path() / "val2017")) < 5000:
-            return False
+    def _download_data(self) -> None:
+        setup_fiftyone_env()
 
-        return True
+        # This is an expensive import, so don't want to unnecessarily import it in
+        # other files that import datasets/__init__.py
+        import fiftyone.zoo as foz
 
-    def _download_data(self) -> None:
-        COCO_DATASET.fetch(extract=True)
-        COCO_ANNOTATIONS.fetch(extract=True)
+        split_str = "validation" if self.split == DatasetSplit.VAL else "train"
+        self.dataset = foz.load_zoo_dataset(
+            "coco-2017", split=split_str, max_samples=self.num_samples, shuffle=True
+        )