augmentations.py

import math
import tensorflow as tf

from data_utils import exterior_exclusion


def random_rotation(image, max_degrees, bbox=None, prob=0.5):
    """Applies random rotation to image and bbox"""
    def _rotation(image, bbox):
        # Get random angle
        degrees = tf.random.uniform([], minval=-max_degrees, maxval=max_degrees, dtype=tf.float32)
        radians = degrees * math.pi / 180.
        if bbox is not None:
            # Get offset from image center
            image_shape = tf.cast(tf.shape(image), tf.float32)
            image_height, image_width = image_shape[0], image_shape[1]
            bbox = tf.cast(bbox, tf.float32)
            center_x = image_width / 2.
            center_y = image_height / 2.
            bbox_center_x = (bbox[0] + bbox[2]) / 2.
            bbox_center_y = (bbox[1] + bbox[3]) / 2.
            trans_x = center_x - bbox_center_x
            trans_y = center_y - bbox_center_y

            # Apply rotation
            image = _translate_image(image, trans_x, trans_y)
            bbox = _translate_bbox(bbox, image_height, image_width, trans_x, trans_y)
            image = tf.contrib.image.rotate(image, radians, interpolation='BILINEAR')
            bbox = _rotate_bbox(bbox, image_height, image_width, radians)
            image = _translate_image(image, -trans_x, -trans_y)
            bbox = _translate_bbox(bbox, image_height, image_width, -trans_x, -trans_y)
            bbox = tf.cast(bbox, tf.int32)

            return image, bbox
        return tf.contrib.image.rotate(image, radians, interpolation='BILINEAR')

    retval = image if bbox is None else (image, bbox)
    return tf.cond(_should_apply(prob), lambda: _rotation(image, bbox), lambda: retval)


def random_bbox_jitter(bbox, image_height, image_width, max_fraction, prob=0.5):
    """Randomly jitters bbox coordinates by +/- jitter_fraction of the width/height"""
    def _bbox_jitter(bbox):
        bbox = tf.cast(bbox, tf.float32)
        width_jitter = max_fraction*(bbox[2] - bbox[0])
        height_jitter = max_fraction*(bbox[3] - bbox[1])
        xmin = bbox[0] + tf.random.uniform([], minval=-width_jitter, maxval=width_jitter, dtype=tf.float32)
        ymin = bbox[1] + tf.random.uniform([], minval=-height_jitter, maxval=height_jitter, dtype=tf.float32)
        xmax = bbox[2] + tf.random.uniform([], minval=-width_jitter, maxval=width_jitter, dtype=tf.float32)
        ymax = bbox[3] + tf.random.uniform([], minval=-height_jitter, maxval=height_jitter, dtype=tf.float32)
        xmin, ymin, xmax, ymax = _clip_bbox(xmin, ymin, xmax, ymax, image_height, image_width)
        bbox = tf.cast(tf.stack([xmin, ymin, xmax, ymax]), tf.int32)
        return bbox

    return tf.cond(_should_apply(prob), lambda: _bbox_jitter(bbox), lambda: bbox)


def random_shift_and_scale(image, max_shift, max_scale_change, prob=0.5):
    """Applies random shift and scale to pixel values"""
    def _shift_and_scale(image):
        shift = tf.cast(tf.random.uniform([], minval=-max_shift, maxval=max_shift, dtype=tf.int32), tf.float32)
        scale = tf.random.uniform([], minval=(1. - max_scale_change),
                                  maxval=(1. + max_scale_change), dtype=tf.float32)
        image = scale*(tf.cast(image, tf.float32) + shift)
        image = tf.cast(tf.clip_by_value(image, 0., 255.), tf.uint8)
        return image

    return tf.cond(_should_apply(prob), lambda: _shift_and_scale(image), lambda: image)


def random_shear(image, max_lambda, bbox=None, prob=0.5):
    """Applies random shear in either the x or y direction"""
    shear_lambda = tf.random.uniform([], minval=-max_lambda, maxval=max_lambda, dtype=tf.float32)
    image_shape = tf.cast(tf.shape(image), tf.float32)
    image_height, image_width = image_shape[0], image_shape[1]

    def _shear_x(image, bbox):
        image = _shear_x_image(image, shear_lambda)
        if bbox is not None:
            bbox = _shear_bbox(bbox, image_height, image_width, shear_lambda, horizontal=True)
            bbox = tf.cast(bbox, tf.int32)
            return image, bbox
        return image

    def _shear_y(image, bbox):
        image = _shear_y_image(image, shear_lambda)
        if bbox is not None:
            bbox = _shear_bbox(bbox, image_height, image_width, shear_lambda, horizontal=False)
            bbox = tf.cast(bbox, tf.int32)
            return image, bbox
        return image

    def _shear(image, bbox):
        return tf.cond(_should_apply(0.5), lambda: _shear_x(image, bbox), lambda: _shear_y(image, bbox))

    retval = image if bbox is None else (image, bbox)
    return tf.cond(_should_apply(prob), lambda: _shear(image, bbox), lambda: retval)


def random_exterior_exclusion(image, prob=0.5):
    """Randomly removes visual features exterior to the patient's body"""
    def _exterior_exclusion(image):
        shape = image.get_shape()
        image = tf.py_func(exterior_exclusion, [image], tf.uint8)
        image.set_shape(shape)
        return image
    return tf.cond(_should_apply(prob), lambda: _exterior_exclusion(image), lambda: image)


def _translate_image(image, delta_x, delta_y):
    """Translate an image"""
    return tf.contrib.image.translate(image, [delta_x, delta_y], interpolation='BILINEAR')


def _translate_bbox(bbox, image_height, image_width, delta_x, delta_y):
    """Translate an bbox, ensuring coordinates lie in the image"""
    bbox = bbox + tf.stack([delta_x, delta_y, delta_x, delta_y])
    xmin, ymin, xmax, ymax = _clip_bbox(bbox[0], bbox[1], bbox[2], bbox[3], image_height, image_width)
    bbox = tf.stack([xmin, ymin, xmax, ymax])
    return bbox


def _rotate_bbox(bbox, image_height, image_width, radians):
    """Rotates the bbox by the given angle"""
    # Shift bbox to origin
    xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[2], bbox[3]
    center_x = (xmin + xmax) / 2.
    center_y = (ymin + ymax) / 2.
    xmin = xmin - center_x
    xmax = xmax - center_x
    ymin = ymin - center_y
    ymax = ymax - center_y

    # Rotate bbox coordinates
    radians = -radians  # negate direction since y-axis is flipped
    coords = tf.stack([[xmin, ymin], [xmax, ymin], [xmin, ymax], [xmax, ymax]])
    coords = tf.transpose(tf.cast(coords, tf.float32))
    rotation_matrix = tf.stack(
        [[tf.cos(radians), -tf.sin(radians)],
         [tf.sin(radians), tf.cos(radians)]])
    new_coords = tf.matmul(rotation_matrix, coords)

    # Find new bbox coordinates and clip to image size
    xmin = tf.reduce_min(new_coords[0, :]) + center_x
    ymin = tf.reduce_min(new_coords[1, :]) + center_y
    xmax = tf.reduce_max(new_coords[0, :]) + center_x
    ymax = tf.reduce_max(new_coords[1, :]) + center_y
    xmin, ymin, xmax, ymax = _clip_bbox(xmin, ymin, xmax, ymax, image_height, image_width)
    bbox = tf.stack([xmin, ymin, xmax, ymax])

    return bbox


def _shear_x_image(image, shear_lambda):
    """Shear image in x-direction"""
    tform = tf.stack([1., shear_lambda, 0., 0., 1., 0., 0., 0.])
    image = tf.contrib.image.transform(
        image, tform, interpolation='BILINEAR')
    return image


def _shear_y_image(image, shear_lambda):
    """Shear image in y-direction"""
    tform = tf.stack([1., 0., 0., shear_lambda, 1., 0., 0., 0.])
    image = tf.contrib.image.transform(
        image, tform, interpolation='BILINEAR')
    return image


def _shear_bbox(bbox, image_height, image_width, shear_lambda, horizontal=True):
    """Shear bbox in x- or y-direction"""
    # Shear bbox coordinates
    xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[2], bbox[3]
    coords = tf.stack([[xmin, ymin], [xmax, ymin], [xmin, ymax], [xmax, ymax]])
    coords = tf.transpose(tf.cast(coords, tf.float32))
    if horizontal:
        shear_matrix = tf.stack(
            [[1., -shear_lambda],
             [0., 1.]])
    else:
        shear_matrix = tf.stack(
            [[1., 0.],
             [-shear_lambda, 1.]])
    new_coords = tf.matmul(shear_matrix, coords)

    # Find new bbox coordinates and clip to image size
    xmin = tf.reduce_min(new_coords[0, :])
    ymin = tf.reduce_min(new_coords[1, :])
    xmax = tf.reduce_max(new_coords[0, :])
    ymax = tf.reduce_max(new_coords[1, :])
    xmin, ymin, xmax, ymax = _clip_bbox(xmin, ymin, xmax, ymax, image_height, image_width)
    bbox = tf.stack([xmin, ymin, xmax, ymax])

    return bbox


def _clip_bbox(xmin, ymin, xmax, ymax, image_height, image_width):
    """Clip bbox to valid image coordinates"""
    xmin = tf.clip_by_value(xmin, 0, image_width)
    ymin = tf.clip_by_value(ymin, 0, image_height)
    xmax = tf.clip_by_value(xmax, 0, image_width)
    ymax = tf.clip_by_value(ymax, 0, image_height)
    return xmin, ymin, xmax, ymax


def _should_apply(prob):
    """Helper function to create bool tensor with probability"""
    return tf.cast(tf.floor(tf.random_uniform([], dtype=tf.float32) + prob), tf.bool)