diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py index 6ecea75ca5..cd0f504d70 100644 --- a/chainercv/datasets/__init__.py +++ b/chainercv/datasets/__init__.py @@ -22,6 +22,8 @@ from chainercv.datasets.cub.cub_utils import cub_label_names # NOQA from chainercv.datasets.directory_parsing_label_dataset import directory_parsing_label_names # NOQA from chainercv.datasets.directory_parsing_label_dataset import DirectoryParsingLabelDataset # NOQA +from chainercv.datasets.flic.flic_keypoint_dataset import FLICKeypointDataset # NOQA +from chainercv.datasets.flic.flic_utils import flic_joint_names # NOQA from chainercv.datasets.mixup_soft_label_dataset import MixUpSoftLabelDataset # NOQA from chainercv.datasets.online_products.online_products_dataset import online_products_super_label_names # NOQA from chainercv.datasets.online_products.online_products_dataset import OnlineProductsDataset # NOQA diff --git a/chainercv/datasets/flic/__init__.py b/chainercv/datasets/flic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/chainercv/datasets/flic/flic_keypoint_dataset.py b/chainercv/datasets/flic/flic_keypoint_dataset.py new file mode 100644 index 0000000000..2d41e3f0e8 --- /dev/null +++ b/chainercv/datasets/flic/flic_keypoint_dataset.py @@ -0,0 +1,138 @@ +import glob +import os + +import numpy as np + +import chainer +from chainercv.datasets.flic import flic_utils +from chainercv import utils + +try: + from scipy.io import loadmat + _scipy_available = True +except (ImportError, TypeError): + _scipy_available = False + + +class FLICKeypointDataset(chainer.dataset.DatasetMixin): + + """`Frames Labaled in Cinema (FLIC)`_ dataset with annotated keypoints. + + .. _`Frames Labaled in Cinema (FLIC)`: + https://bensapp.github.io/flic-dataset.html + + An index corresponds to each image. + + When queried by an index, this dataset returns the corresponding + :obj:`img, keypoint`, which is a tuple of an image and keypoints + that indicates visible keypoints in the image. + The data type of the two elements are :obj:`float32, float32`. + + The keypoints are packed into a two dimensional array of shape + :math:`(K, 2)`, where :math:`K` is the number of keypoints. + Note that :math:`K=29` in FLIC dataset. Also note that not all + keypoints are visible in an image. When a keypoint is not visible, + the values stored for that keypoint are :obj:`~numpy.nan`. The second axis + corresponds to the :math:`y` and :math:`x` coordinates of the + keypoints in the image. + + The torso bounding box is a one-dimensional array of shape :math:`(4,)`. + The elements of the bounding box corresponds to + :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the four attributes are + coordinates of the top left and the bottom right vertices. + This information can optionally be retrieved from the dataset + by setting :obj:`return_torsobox = True`. + + Args: + data_dir (string): Path to the root of the training data. If this is + :obj:`auto`, this class will automatically download data for you + under :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/flic/FLIC-full`. + split ({'train', 'test'}): Select from dataset splits used in + the FLIC dataset. + return_torsobox (bool): If :obj:`True`, this returns a bounding box + around the torso. The default value is :obj:`False`. + use_bad (bool): If :obj:`False`, the data which have :obj:`isbad = 1` + will be ignored. The default is :obj:`False`. + use_unchecked (bool): If :obj:`False`, the data which have + :obj:`isunchecked = 1` will be ignored. The default is + :obj:`False`. + + """ + + def __init__(self, data_dir='auto', split='train', return_torsobox=False, + use_bad=False, use_unchecked=False): + super(FLICKeypointDataset, self).__init__() + if split not in ['train', 'test']: + raise ValueError( + '\'split\' argment should be eighter \'train\' or \'test\'.') + + if not _scipy_available: + raise ImportError( + 'scipy is needed to extract labales from the .mat file.' + 'Please install scipy:\n\n' + '\t$pip install scipy\n\n') + + if data_dir == 'auto': + data_dir = flic_utils.get_flic() + + img_paths = {os.path.basename(fn): fn for fn in glob.glob( + os.path.join(data_dir, 'images', '*.jpg'))} + + label_annos = [ + 'poselet_hit_idx', + 'moviename', + 'coords', + 'filepath', + 'imgdims', + 'currframe', + 'torsobox', + 'istrain', + 'istest', + 'isbad', + 'isunchecked', + ] + annos = loadmat(os.path.join(data_dir, 'examples.mat')) + + self.img_paths = list() + self.keypoints = list() + self.torsoboxes = list() + self.return_torsobox = return_torsobox + + for label in annos['examples'][0]: + label = {label_annos[i]: val for i, val in enumerate(label)} + if not use_bad and int(label['isbad']) == 1: + continue + if not use_unchecked and int(label['isunchecked']) == 1: + continue + if ((split == 'train' and int(label['istrain']) == 0) + or (split == 'test' and int(label['istest']) == 0)): + continue + + self.img_paths.append(img_paths[label['filepath'][0]]) + self.keypoints.append(label['coords'].T[:, ::-1]) + if return_torsobox: + self.torsoboxes.append(label['torsobox'][0, [1, 0, 3, 2]]) + + def __len__(self): + return len(self.img_paths) + + def get_example(self, i): + """Returns the i-th example. + + Args: + i (int): The index of the example. + + Returns: + tuple of an image and keypoints. + The image is in CHW format and its color channel is ordered in + RGB. + If :obj:`return_torsobox = True`, + a bounding box is appended to the returned value. + + """ + img = utils.read_image(self.img_paths[i]) + keypoint = np.array(self.keypoints[i], dtype=np.float32) + if self.return_torsobox: + return img, keypoint, self.torsoboxes[i] + else: + return img, keypoint diff --git a/chainercv/datasets/flic/flic_utils.py b/chainercv/datasets/flic/flic_utils.py new file mode 100644 index 0000000000..cdd389cd41 --- /dev/null +++ b/chainercv/datasets/flic/flic_utils.py @@ -0,0 +1,51 @@ +import os + +from chainer.dataset import download +from chainercv import utils + +root = 'pfnet/chainercv/flic' + +url = 'http://vision.grasp.upenn.edu/video/FLIC-full.zip' + +flic_joint_names = [ + 'lsho', + 'lelb', + 'lwri', + 'rsho', + 'relb', + 'rwri', + 'lhip', + 'lkne', + 'lank', + 'rhip', + 'rkne', + 'rank', + 'leye', + 'reye', + 'lear', + 'rear', + 'nose', + 'msho', + 'mhip', + 'mear', + 'mtorso', + 'mluarm', + 'mruarm', + 'mllarm', + 'mrlarm', + 'mluleg', + 'mruleg', + 'mllleg', + 'mrlleg' +] + + +def get_flic(): + data_root = download.get_dataset_directory(root) + dataset_dir = os.path.join(data_root, 'FLIC-full') + if not os.path.exists(dataset_dir): + download_file_path = utils.cached_download(url) + ext = os.path.splitext(url)[1] + utils.extractall(download_file_path, data_root, ext) + + return dataset_dir diff --git a/docs/source/reference/datasets.rst b/docs/source/reference/datasets.rst index 68ac11854b..b523c8d6d4 100644 --- a/docs/source/reference/datasets.rst +++ b/docs/source/reference/datasets.rst @@ -62,6 +62,13 @@ CUBKeypointDataset ~~~~~~~~~~~~~~~~~~ .. autoclass:: CUBKeypointDataset +FLIC +---- + +FLICKeypointDataset +~~~~~~~~~~~~~~~~~~~ +.. autoclass:: FLICKeypointDataset + MS COCO ------- @@ -84,7 +91,6 @@ OnlineProductsDataset ~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: OnlineProductsDataset - PASCAL VOC ----------