diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a7e36cf --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +.PHONY: clean flake8 + +clean: + find . -name "*.so" -exec rm -rf {} \; + find . -name "*.pyc" -exec rm -rf {} \; + find . -depth -name "__pycache__" -type d -exec rm -rf '{}' \; + rm -rf build/ dist/ *.egg-info/ + +flake8: + flake8 --exclude "test_*" --max-line-length=100 --count --statistics --exit-zero spindle_tracker/ diff --git a/README.md b/README.md index d2e4c6f..52b4d11 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,4 @@ This software is used to track mitotic spindle components in fluorescent microscopy images. It is used for daily research tasks. +test diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000..a605f0e --- /dev/null +++ b/doc/README.md @@ -0,0 +1,3 @@ +# Contrib files + +Put here all files which can help during `sktracker` development. diff --git a/doc/class_diagram.png b/doc/class_diagram.png new file mode 100644 index 0000000..992bf91 Binary files /dev/null and b/doc/class_diagram.png differ diff --git a/doc/class_diagram.svg b/doc/class_diagram.svg new file mode 100644 index 0000000..e6a8c66 --- /dev/null +++ b/doc/class_diagram.svg @@ -0,0 +1,688 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + CostMatrix + + + + AbstractCostFunction + + Solver + + + ByFrameSolver + + GapCloseSolver + + Tracker + + + AbstractLinkCostFunction + + AbstractDiagCostFunction + + DiagCostFunction + + BrownianCostFunction + + DirectedCostFunction + + + + + + + Block + + LinkBlock + + DiagBlock + + + + + + GapCloseMergeSplitSolver + + + + diff --git a/doc/lap_view.png b/doc/lap_view.png new file mode 100644 index 0000000..0c6cb86 Binary files /dev/null and b/doc/lap_view.png differ diff --git a/doc/lap_view.svg b/doc/lap_view.svg new file mode 100644 index 0000000..0b34a7c --- /dev/null +++ b/doc/lap_view.svg @@ -0,0 +1,684 @@ + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + LBR + + + + + Death + Birth + Link + + Vin = [Vin1, Vin2] + Vin + + Vout + Vout = [Vout1, Vout2] + Linkin, out = [C11(Vin1, Vout1), C12(Vin1, Vout2), C21(Vin2, Vout1), C22(Vin2, Vout2)] + Birthout = [C(Vout1), C(Vout2)] + Deathin = [C(Vin1), C(Vin2)] + + + + + + LBR + + + Death + Birth + Link + + Vin = [Vin1] + Vin + + Vout + Vout = [Vout1] + Linkin, out = [C11(Vin1, Vout1)] + Birthout = [C(Vout1)] + Deathin = [C(Vin1)] + + diff --git a/doc/repo_diagram.png b/doc/repo_diagram.png new file mode 100644 index 0000000..dfe517c Binary files /dev/null and b/doc/repo_diagram.png differ diff --git a/doc/repo_diagram.svg b/doc/repo_diagram.svg new file mode 100644 index 0000000..0cae33e --- /dev/null +++ b/doc/repo_diagram.svg @@ -0,0 +1,1128 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + sktracker + + + + utils + + + + tracker.py + + + + tracker + + + + matrix + + + + link_matrix.py + + + + + diagonal_matrix.py + + + + + + birth_matrix.py + + + + + detection + + + + peak_detector.py + + + + io + + + + tifffile.py + + + + ome.py + + + + metadataio.py + + + + + death_matrix.py + + + + + cost_function + + + + + distance_function.py + + + + + + distance_and_velocity_function.py + + + + + solver + + + + + assignment_solver.py + + + + + + gc_solver.py + + + + + + gc_merge_split_solver.py + + + + + + + + + + + + + + + + + Done + + + + + + + + + + doc + + + + Tests dir in each sktracker subdirectories + + + data_generator.py + + + Done + + + stackio.py + + + + + objectsio.py + + + + + + diff --git a/spindle_tracker/data_selector/load_trackers.py b/spindle_tracker/data_selector/load_trackers.py index c249fc1..9ecc788 100644 --- a/spindle_tracker/data_selector/load_trackers.py +++ b/spindle_tracker/data_selector/load_trackers.py @@ -3,10 +3,10 @@ import re import logging -log = logging.getLogger(__name__) +from spindle_tracker.utils.sort import natural_keys +from spindle_tracker.utils import print_progress -from sktracker.utils.sort import natural_keys -from sktracker.utils import print_progress +log = logging.getLogger(__name__) def tracker_load(base_dir, movies_path, patterns, tracker_class=None, tracker_params={}): diff --git a/spindle_tracker/detector/__init__.py b/spindle_tracker/detector/__init__.py index 537c244..fe9281e 100644 --- a/spindle_tracker/detector/__init__.py +++ b/spindle_tracker/detector/__init__.py @@ -1 +1,2 @@ from .log_detector import log_detector +from .peak_detector import peak_detector diff --git a/spindle_tracker/io/objectsio.py b/spindle_tracker/io/objectsio.py index 4ceb8ae..fb7e45b 100644 --- a/spindle_tracker/io/objectsio.py +++ b/spindle_tracker/io/objectsio.py @@ -77,11 +77,11 @@ def __init__(self, metadata=None, @classmethod def from_stackio(cls, stackio): - """Loads metadata from :class:`sktracker.io.stackio` + """Loads metadata from :class:`spindle_tracker.io.stackio` Parameters ---------- - stackio : :class:`sktracker.io.StackIO` + stackio : :class:`spindle_tracker.io.StackIO` """ return cls(metadata=stackio.metadata) diff --git a/spindle_tracker/io/stackio.py b/spindle_tracker/io/stackio.py index e33068d..c424a97 100644 --- a/spindle_tracker/io/stackio.py +++ b/spindle_tracker/io/stackio.py @@ -1,13 +1,3 @@ - -# -*- coding: utf-8 -*- - - -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - import sys import os import logging @@ -85,11 +75,11 @@ def image_path(self): @classmethod def from_objectsio(cls, objectsio): - """Load images and metadata from :class:`sktracker.io.ObjectsIO` + """Load images and metadata from :class:`spindle_tracker.io.ObjectsIO` Parameters ---------- - objectsio : :class:`sktracker.io.ObjectsIO` + objectsio : :class:`spindle_tracker.io.ObjectsIO` """ return cls(image_path=objectsio.image_path, metadata=objectsio.metadata) @@ -99,7 +89,7 @@ def get_tif(self, multifile=True): Returns ------- - tf : :class:`sktracker.io.TiffFile` + tf : :class:`spindle_tracker.io.TiffFile` """ tf = TiffFile(self.image_path, multifile=multifile) @@ -110,7 +100,7 @@ def get_tif_from_list(self, index=0): Returns ------- - tf : :class:`sktracker.io.TiffFile` + tf : :class:`spindle_tracker.io.TiffFile` """ diff --git a/spindle_tracker/tracker/__init__.py b/spindle_tracker/tracker/__init__.py new file mode 100644 index 0000000..39dbbc9 --- /dev/null +++ b/spindle_tracker/tracker/__init__.py @@ -0,0 +1,21 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from . import cost_function +from . import lapjv +from . import matrix +from . import solver +from . import utils + +__all__ = ["cost_function", + "lapjv", + "matrix", + "solver", + "utils"] diff --git a/spindle_tracker/tracker/cost_function/__init__.py b/spindle_tracker/tracker/cost_function/__init__.py new file mode 100644 index 0000000..1d3bdf5 --- /dev/null +++ b/spindle_tracker/tracker/cost_function/__init__.py @@ -0,0 +1,22 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from .abstract_cost_functions import AbstractCostFunction + +from . import brownian +from . import diagonal +from . import directed +from . import gap_close + +__all__ = ["AbstractCostFunction", + "brownian", + "diagonal", + "directed", + "gap_close"] diff --git a/spindle_tracker/tracker/cost_function/abstract_cost_functions.py b/spindle_tracker/tracker/cost_function/abstract_cost_functions.py new file mode 100644 index 0000000..32f4f4f --- /dev/null +++ b/spindle_tracker/tracker/cost_function/abstract_cost_functions.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import pandas as pd + +__all__ = [] + + +class AbstractCostFunction(object): + """Abstract class + + Parameters + ---------- + context : dict + parameters : dict + """ + + def __init__(self, context, parameters): + self.context = context + self.parameters = parameters + + def get_block(self, *args, **kwargs): + """This method will update the values in `self.mat`. It should be overwritten for any matrix + verification returned by self.build. + + + """ + self.mat = self._build(*args, **kwargs) + + def _build(self): + """This method needs to be overwritten by subclasses + """ + return None + + def check_columns(self, objects, cols): + """Check pandas.DataFrame column names. + + Parameters + ---------- + objects : list of :class:`pandas.DataFrame` or :class:`pandas.DataFrame` + cols : list column names to check + """ + + if isinstance(objects, pd.DataFrame): + objects = [objects] + + cols_set = set(cols) + for obj in objects: + actual_cols_set = set(obj.columns.values) + if not cols_set.issubset(actual_cols_set): + raise ValueError("The passed dataframe doesn't" + " contain the required columns." + "Missing columns: {}".format( + cols_set.difference(actual_cols_set))) + + def check_context(self, key, obj_type): + """Check wether self.context contain a key on a specific type. + + Parameters + ---------- + key : str + Key to find in self.context. + obj_type : class name + To check context value type. + + Returns + ------- + The desired key's value in context. + """ + + message = "Context {} does not contain required key : {}" + if key not in self.context.keys(): + raise ValueError(message.format(self.context, key)) + + message = "Context value {} does not have valid key type : {}" + if not isinstance(self.context[key], obj_type): + raise TypeError(message.format(self.context[key], obj_type)) + + return self.context[key] diff --git a/spindle_tracker/tracker/cost_function/brownian.py b/spindle_tracker/tracker/cost_function/brownian.py new file mode 100644 index 0000000..1a36554 --- /dev/null +++ b/spindle_tracker/tracker/cost_function/brownian.py @@ -0,0 +1,165 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import numpy as np +import pandas as pd +from scipy.spatial.distance import cdist + +from . import AbstractCostFunction +from .gap_close import AbstractGapCloseCostFunction +from ...trajectories import Trajectories + +__all__ = ["BrownianLinkCostFunction", "BrownianGapCloseCostFunction"] + + +class BrownianLinkCostFunction(AbstractCostFunction): + """This class generates cost matrices for brownian motion + trajectories. + + The cost between two position is given by the square of their + distance + + Attributes + ---------- + + parameters: dict + Used by the `build` method, with the following keys: + + - 'distance_metric': a string, default 'euclidean', + passed to `scipy.spatial.distance.cdist` + (see this function documentation for more) + + - 'coords': a list of column names on which to compute the distance, + default ['x', 'y', 'z'] + + - 'max_speed': a float, default 1. All the values of the cost matrix + for which the distance *divided by the time difference* is higher than + this parameter's value are set to np.nan + + context: dict + Context is used to store vectors. + + - pos_in: :class:`pandas.DataFrame` + The object coordinates to link from + + - pos_out: :class:`pandas.DataFrame` + The object coordinates to link to + + """ + + def __init__(self, parameters): + """ + """ + + _parameters = {'distance_metric': 'euclidean', + 'max_speed': 1., + 'coords': ['x', 'y', 'z']} + _parameters.update(parameters) + + super(BrownianLinkCostFunction, self).__init__(context={}, parameters=_parameters) + + def _build(self): + """ + """ + + # Get parameters + coords = self.parameters['coords'] + distance_metric = self.parameters['distance_metric'] + max_speed = self.parameters['max_speed'] + + # Check context + pos_in = self.check_context('pos_in', pd.DataFrame) + pos_out = self.check_context('pos_out', pd.DataFrame) + + # Chech vectors + self.check_columns([pos_in, pos_out], list(coords) + ['t']) + + if pos_out.empty or pos_in.empty: + return pd.DataFrame([]) + + dt = pos_out['t'].iloc[0] - pos_in['t'].iloc[0] + + # Build matrix block + distances = cdist(pos_in[coords].astype(np.float), + pos_out[coords].astype(np.float), + metric=distance_metric) + + distances /= np.abs(dt) + distances[distances > max_speed] = np.nan + distances = distances ** 2 + + return distances + + +class BrownianGapCloseCostFunction(AbstractGapCloseCostFunction): + """ + """ + + def __init__(self, parameters): + """ + """ + _parameters = {'distance_metric': 'euclidean', + 'max_speed': 1., + 'coords': ['x', 'y', 'z']} + _parameters.update(parameters) + + super(self.__class__, self).__init__(context={}, parameters=_parameters) + + def _build(self,): + """ + """ + + self.check_idxs_length() + + # Get parameters + coords = self.parameters['coords'] + distance_metric = self.parameters['distance_metric'] + + if distance_metric != 'euclidean': + raise Exception("Only 'euclidean' distance are supported for now.") + + max_speed = self.parameters['max_speed'] + + # Check context + idxs_in = self.check_context('idxs_in', list) + idxs_out = self.check_context('idxs_out', list) + trajs = self.check_context('trajs', Trajectories) + + # Just in case the parent didn't do it + trajs.relabel_fromzero('label', inplace=True) + + # Init 2d distances array + mat = np.empty((len(trajs.labels), + len(trajs.labels))) + mat.fill(np.nan) + + # Compute distance between all_pos_out and all_pos_in + all_pos_in = trajs.loc[idxs_in] + all_pos_out = trajs.loc[idxs_out] + vecs = [(all_pos_in[c].values - all_pos_out[c].values) ** 2 for c in coords] + all_dist = np.sqrt(np.sum(vecs, axis=0)) + + # Get all dt + all_dt = np.abs(all_pos_in['t'].values - all_pos_out['t'].values) + + # Compute speeds + speeds = all_dist / all_dt + + # Remove speeds greater than 'max_speed' + speeds[speeds > max_speed] = np.nan + + # Fill 2d distances array + i_in = np.array(idxs_in)[:, 1].astype(int) + i_out = np.array(idxs_out)[:, 1].astype(int) + mat[i_in, i_out] = speeds + + mat = mat ** 2 + + return mat diff --git a/spindle_tracker/tracker/cost_function/diagonal.py b/spindle_tracker/tracker/cost_function/diagonal.py new file mode 100644 index 0000000..c2ccba1 --- /dev/null +++ b/spindle_tracker/tracker/cost_function/diagonal.py @@ -0,0 +1,76 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import numpy as np +from . import AbstractCostFunction + +__all__ = ["DiagonalCostFunction"] + + +class DiagonalCostFunction(AbstractCostFunction): + """Basic cost function for a diagonal block. + + Parameters + ---------- + context: `dict` + this dictionnary must contain at least a `"cost"` key + parameters: `dict` + + Attributes + ---------- + + context: dict + Context need to be updated at each `_build` call + + - cost: float + Default cost + + - objects: + Vector used to build block matrix + + """ + + def __init__(self, context, parameters): + """ + """ + + super(self.__class__, self).__init__(context=context, parameters=parameters) + self.check_context('cost', float) + + def _build(self): + """ + """ + objects = self.check_context('objects', object) + cost = self.check_context('cost', float) + + vect = np.ones(len(objects)) * cost + mat = self._vector_to_matrix(vect) + + return mat + + def _vector_to_matrix(self, vector): + """Converts a 1D :class:`numpy.ndarray` to identity 2D :class:`numpy.ndarray` + with NaNs outside of the diagonal + + Parameters + ---------- + vector: 1D :class:`numpy.ndarray` + + Returns + ------- + mat: 2D :class:`numpy.ndarray` + """ + + size = vector.shape[0] + mat = np.empty((size, size)) + mat[:] = np.nan + mat[np.diag_indices(size)] = vector + + return mat diff --git a/spindle_tracker/tracker/cost_function/directed.py b/spindle_tracker/tracker/cost_function/directed.py new file mode 100644 index 0000000..cbd54e5 --- /dev/null +++ b/spindle_tracker/tracker/cost_function/directed.py @@ -0,0 +1,149 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import numpy as np +import pandas as pd + +from scipy import interpolate + +from . import AbstractCostFunction + +__all__ = ["BasicDirectedLinkCostFunction"] + + +class BasicDirectedLinkCostFunction(AbstractCostFunction): + """This class generates cost matrices for directed motion + trajectories. + + The score for each situations is given by the cosine similarity between a guessed trajectory + vector and the vector made by two points. + + Attributes + ---------- + + paramters: dict + Used by the `build` method, with the following keys: + + - 'distance_metric': a string, default 'euclidean', + passed to `scipy.spatial.distance.cdist` + (see this function documentation for more) + + - 'coords': a list of column names on which to compute the distance, + default ['x', 'y', 'z'] + + - 'max_speed': a float, default 1. All the values of the cost matrix + for which the distance *divided by the time difference* is higher than + this parameter's value are set to np.nan + + context: dict + Context is used to store vectors. + + - pos_in: :class:`pandas.DataFrame` + The object coordinates to link from + + - pos_out: :class:`pandas.DataFrame` + The object coordinates to link to + + """ + + def __init__(self, parameters, context={}): + + _parameters = {'max_speed': 1., + 'past_traj_time': 1, + 'smooth_factor': 0, + 'interpolation_order': 1, + 'coords': ['x', 'y', 'z']} + _parameters.update(parameters) + + super(self.__class__, self).__init__(context=context, parameters=_parameters) + + def _build(self): + """ + """ + + # Get parameters + coords = self.parameters['coords'] + max_speed = self.parameters['max_speed'] + past_traj_time = self.parameters['past_traj_time'] + smooth_factor = self.parameters['smooth_factor'] + interpolation_order = self.parameters['interpolation_order'] + + # Check context + pos_in = self.check_context('pos_in', pd.DataFrame) + pos_out = self.check_context('pos_out', pd.DataFrame) + trajs = self.check_context('trajs', pd.DataFrame) + + # Chech vectors + self.check_columns([pos_in, pos_out], list(coords) + ['t']) + + t_in = pos_in['t'].iloc[0] + t_out = pos_out['t'].iloc[0] + dt = t_out - t_in + + # Select trajectory from (current_time - past_traj_time) and current_time + last_past_time = t_in - (past_traj_time) + past_trajs = trajs[(trajs.t <= t_in) & (trajs.t > last_past_time)] + past_trajs_grouped = past_trajs.groupby(level='label').groups + past_trajs_grouped + + # Compute past trajectories vector + vecs_speed_in = {} + + for label, index in past_trajs_grouped.items(): + past_traj = past_trajs.loc[index] + + vec_speed_in = {} + for coord in coords: + + if past_traj.shape[0] < 4: + # Not enough timepoint to interpolate + vec_speed_in[coord] = np.nan + else: + # Compute the derivative using B-Spline interpolation at time point = t_in + tck = interpolate.splrep(past_traj.t.values, + past_traj[coord].values, + s=smooth_factor, + k=interpolation_order) + splines = interpolate.splev(t_in, tck, der=1) + vec_speed_in[coord] = splines + + vecs_speed_in[label] = vec_speed_in + + vecs_speed_in = pd.DataFrame.from_dict(vecs_speed_in).T.astype(np.float) + + # Compute the matrix according to euclidean distance and angle between vectors + distances = np.empty((pos_in.shape[0], pos_out.shape[0])) + distances.fill(np.nan) + + for i, idx_in in enumerate(pos_in.index): + + vec_speed_in = vecs_speed_in.loc[idx_in] + r_in = pos_in.loc[idx_in] + + for j, idx_out in enumerate(pos_out.index): + + r_out = pos_out.loc[idx_out] + + vec_speed_out = (r_out[coords] - r_in[coords]) / np.abs(dt) + current_speed = np.linalg.norm(vec_speed_out) + + if np.isnan(vec_speed_in).all(): + score = current_speed + elif current_speed > max_speed: + score = np.nan + else: + score = np.dot(vec_speed_in, vec_speed_out) + score /= np.linalg.norm(vec_speed_in) * np.linalg.norm(vec_speed_out) + score = ((score * -1) + 1) * 10 / 2 + score = score + + distances[i, j] = score + + return distances diff --git a/spindle_tracker/tracker/cost_function/gap_close.py b/spindle_tracker/tracker/cost_function/gap_close.py new file mode 100644 index 0000000..28324c2 --- /dev/null +++ b/spindle_tracker/tracker/cost_function/gap_close.py @@ -0,0 +1,33 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +from . import AbstractCostFunction + +__all__ = ["AbstractGapCloseCostFunction"] + + +class AbstractGapCloseCostFunction(AbstractCostFunction): + """ + """ + + def __init__(self, context, parameters): + """ + """ + super(AbstractGapCloseCostFunction, self).__init__(context=context, parameters=parameters) + + def check_idxs_length(self): + """Check wether idxs_in and idxs_out have the same length. + """ + + idxs_in = self.check_context('idxs_in', list) + idxs_out = self.check_context('idxs_out', list) + + if not len(idxs_in) == len(idxs_out): + raise ValueError('''self.context['idxs_in'] and self.context['idxs_out'] + must have the same length ''') diff --git a/spindle_tracker/tracker/lapjv/__init__.py b/spindle_tracker/tracker/lapjv/__init__.py new file mode 100644 index 0000000..efb03e1 --- /dev/null +++ b/spindle_tracker/tracker/lapjv/__init__.py @@ -0,0 +1,13 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from .lapjv import lapjv + +__all__ = ['lapjv'] diff --git a/spindle_tracker/tracker/lapjv/_lapjv.pyx b/spindle_tracker/tracker/lapjv/_lapjv.pyx new file mode 100644 index 0000000..ea84ac6 --- /dev/null +++ b/spindle_tracker/tracker/lapjv/_lapjv.pyx @@ -0,0 +1,529 @@ +'''_lapjv.pyx - Jonker-Volgenant algorithm for linear assignment problem. + +Supplementary routines for lapjv.py + +CellProfiler is distributed under the GNU General Public License, +but this file is licensed under the more permissive BSD license. +See the accompanying file LICENSE for details. + +Copyright (c) 2003-2009 Massachusetts Institute of Technology +Copyright (c) 2009-2013 Broad Institute +All rights reserved. +''' + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import numpy as np +cimport numpy as np +cimport cython + +cdef extern from "Python.h": + ctypedef int Py_intptr_t + +cdef extern from "numpy/arrayobject.h": + ctypedef class numpy.ndarray [object PyArrayObject]: + cdef char *data + cdef int nd + cdef Py_intptr_t *dimensions + cdef Py_intptr_t *strides + cdef void import_array() + cdef int PyArray_ITEMSIZE(np.ndarray) + cdef void * PyArray_DATA(np.ndarray) + +import_array() + +__eps = np.sqrt(np.finfo(np.float64).eps) + +def reduction_transfer( + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] ii, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] j, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] idx, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] count, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] x, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] u, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] v, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] c): + '''Perform the reduction transfer step from the Jonker-Volgenant algorithm + + The data is input in a ragged array in terms of "i" structured as a + vector of values for each i,j combination where: + + ii - the i to be reduced + j - the j-index of every entry + idx - the index of the first entry for each i + count - the number of entries for each i + x - the assignment of j to i + u - the dual variable "u" which will be updated. It should be + initialized to zero for the first reduction transfer. + v - the dual variable "v" which will be reduced in-place + c - the cost for each entry. + + The code described in the paper is: + + for each assigned row i do + begin + j1:=x[i]; u=min {c[i,j]-v[j] | j=1..n, j != j1}; + v[j1]:=v[j1]-(u-u[i]); + u[i] = u; + end; + + The authors note that reduction transfer can be applied in later stages + of the algorithm but does not seem to provide a substantial benefit + in speed. + ''' + cdef: + int i + int n_i = ii.shape[0] + int iii + int j1 + int j_idx + int j_temp + int j_count + double min_u + double u_temp + int j_at_min + double inf = np.inf + int *p_i = (ii.data) + int *p_j = (j.data) + int *p_idx = (idx.data) + int *p_count = (count.data) + int *p_x = (x.data) + double *p_u = (u.data) + double *v_base = (v.data) + double *c_base = (c.data) + double *p_c + + with nogil: + for iii from 0 <= iii < n_i: + i = p_i[iii] + min_u = inf + j1 = p_x[i] + j_count = p_count[i] + p_c = c_base + p_idx[i] + j_at_min = -1 + for j_idx from 0 <= j_idx < j_count: + j_temp = p_j[j_idx] + if j_temp != j1: + u_temp = p_c[j_idx] - v_base[j_temp] + if u_temp < min_u: + min_u = u_temp + j_at_min = j_temp + if j_at_min != -1: + v_base[j1] -= min_u - p_u[i] + p_u[i] = min_u + +def augmenting_row_reduction( + int n, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] ii, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] jj, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] idx, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] count, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] x, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] y, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] u, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] v, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] c): + '''Perform the augmenting row reduction step from the + Jonker-Volgenaut algorithm + + n - the number of i and j in the linear assignment problem + ii - the unassigned i + jj - the j-index of every entry in c + idx - the index of the first entry for each i + count - the number of entries for each i + x - the assignment of j to i + y - the assignment of i to j + u - the dual variable "u" which will be updated. It should be + initialized to zero for the first reduction transfer. + v - the dual variable "v" which will be reduced in-place + c - the cost for each entry. + + returns a numpy array of the new free choices. + + ''' + free = np.ascontiguousarray(np.zeros(np.max(y)+1, np.uint32), np.uint32) + cdef: + int *p_i = (ii.data) + int *p_free = (PyArray_DATA(free)) + int nfree = 0 + int i + int j + int iii + int jjj + int n_i = ii.shape[0] + int n_j + int *p_j_base = (jj.data) + int *p_idx_base = (idx.data) + int *p_count_base = (count.data) + int *p_x_base = (x.data) + int *p_y_base = (y.data) + double *p_u_base = (u.data) + double *p_v_base = (v.data) + double *p_c_base = (c.data) + double *p_c + double inf = np.inf + int *p_j + double u1 + double u2 + double temp + double eps = __eps + int i1 + int j1 + int j2 + int k + + ####################################### + # + # From Jonker: + # + # procedure AUGMENTING ROW REDUCTION; + # begin + # LIST: = {all unassigned rows}; + # for all i in LIST do + # repeat + # ul:=min {c[i,j]-v[j] for j=l ...n}; + # select j1 with c [i,j 1] - v[j 1] = u1; + # u2:=min {c[i,j]-v[j] for j=l ...n,j< >jl} ; + # select j2 with c [i,j2] - v [j2] = u2 and j2 < >j 1 ; + # u[i]:=u2; + # if ul 0 then x [k]:=0; x[i]:=jl; y [ j l ] : = i ; i:=k + # until ul =u2 (* no reduction transfer *) or k=0 i~* augmentation *) + # end + + with nogil: + k = 0 + while k < n_i: + i = p_i[k] + k += 1 + n_j = p_count_base[i] + p_j = p_j_base + p_idx_base[i] + p_c = p_c_base + p_idx_base[i] + u1 = inf + u2 = inf + # Find u1 and u2 + for jjj from 0 <= jjj < n_j: + j = p_j[jjj] + temp = p_c[jjj] - p_v_base[j] + if temp < u1: + u2 = u1 + j2 = j1 + u1 = temp + j1 = j + elif temp < u2: + u2 = temp + j2 = j + # perform the reduction + i1 = p_y_base[j1] + if u1 + eps < u2: + p_v_base[j1] = p_v_base[j1] - u2 + u1 + elif i1 != n: + j1 = j2 + i1 = p_y_base[j1] + if i1 != n: + if u1 + eps < u2: + k -= 1 + p_i[k] = i1 + else: + p_free[nfree] = i1 + nfree += 1 + p_x_base[i] = j1 + p_y_base[j1] = i + free.resize(nfree) + return free + +def augment( + int n, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] ii, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] jj, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] idx, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] count, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] x, + np.ndarray[dtype=np.uint32_t, ndim=1, + negative_indices=False, mode='c'] y, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] u, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] v, + np.ndarray[dtype=np.float64_t, ndim=1, + negative_indices=False, mode='c'] c): + '''Perform the augmentation step to assign unassigned i and j + + n - the # of i and j, also the marker of unassigned x and y + ii - the unassigned i + jj - the ragged arrays of j for each i + idx - the index of the first j for each i + count - the number of j for each i + x - the assignments of j for each i + y - the assignments of i for each j + u,v - the dual variables + c - the costs + ''' + # + # Holder for c[i,j] - v[j] + # + d = np.ascontiguousarray(np.zeros(n, np.float64), np.float64) + pred = np.ascontiguousarray(np.ones(n, np.uint32)) + # + # The J that all have the currently considered minimum + # + scan = np.ascontiguousarray(np.zeros(n, np.uint32), np.uint32) + # + # The J that have been reached by done J + # + to_do = np.ascontiguousarray(np.zeros(n, np.uint32), np.uint32) + # + # The J that have been processed + # + ready = np.ascontiguousarray(np.zeros(n, np.uint32), np.uint32) + # + # We need to scan all J for each I to find the ones in the to-do list + # so we need to maintain an array that keeps track of whether a J is + # on the to-do list. We do this by starting out at -1 and replacing + # the ones moved off of "to-do" with "i". Then we can check to see if + # done[j] == i to see if it is to-do or not + # + done = np.ascontiguousarray(-np.ones(n, np.uint32), np.uint32) + on_to_do = np.ascontiguousarray(-np.ones(n, np.uint32), np.uint32) + eps = np.finfo(np.float32).eps + cdef: + int last, low, up + int i, iii, i1, k + int n_i = ii.shape[0] + int n_j, n_j2 + int j, jjj, j1, jidx + int found + int n_to_do + int n_ready + int *p_i_base = (ii.data) + int *p_j_base = (jj.data) + int *p_j + int *p_j2 + int *p_idx_base = (idx.data) + int *p_count_base = (count.data) + int *p_x_base = (x.data) + int *p_y_base = (y.data) + double *p_u_base = (u.data) + double *p_v_base = (v.data) + double *p_c_base = (c.data) + double *p_c + double *p_d_base = (PyArray_DATA(d)) + int *p_pred_base = (PyArray_DATA(pred)) + int *p_to_do = (PyArray_DATA(to_do)) + int *p_scan = (PyArray_DATA(scan)) + int *p_ready = (PyArray_DATA(ready)) + int *p_done = (PyArray_DATA(done)) + int *p_on_to_do = (PyArray_DATA(on_to_do)) + double inf = np.sum(c) + 1 # This is larger than any path through. + double umin, temp, h, u1 + + ################################################## + # + # Augment procedure: from the Jonker paper. + # + # procedure AUGMENT; + # begin + # for all unassigned i* do + # begin + # for j:= 1 ... n do + # begin d[j] := c[i*,j] - v[j] ; pred[j] := i* end; + # READY: = { ) ; SCAN: = { } ; TODO: = { 1 ... n} ; + # repeat + # if SCAN = { } then + # begin + # u = min {d[j] for j in TODO} ; + # SCAN: = {j | d[j] = u} ; + # TODO: = TODO - SCAN; + # for j in SCAN do if y[j]==0 then go to augment + # end; + # select any j* in SCAN; + # i := y[j*]; SCAN := SCAN - {j*} ; READY: = READY + {j*} ; + # for all j in TODO do if u + c[i,j] < d[j] then + # begin + # d[j] := u + c[i,j]; pred[j] := i; + # if d[j] = u then + # if y[j] is unassigned then go to augment else + # begin SCAN: = SCAN + {j} ; TODO: = TODO - {j} end + # end + # until false; (* repeat always ends with go to augment *) + #augment: + # (* price updating *) + # for k in READY do v[k]: = v[k] + d[k] - u; + # (* augmentation *) + # repeat + # i: = pred[j]; y[ j ] := i ; k:=j; j:=x[i]; x[i]:= k + # until i = i* + # end + #end + with nogil: + for iii from 0 <= iii < n_i: + i = p_i_base[iii] + # + # Initialization + # + p_j = p_j_base + p_idx_base[i] + n_j = p_count_base[i] + p_c = p_c_base + p_idx_base[i] + # + # Initialize d to "inf" for j that are not in the sparse list + # + for jjj from 0 <= jjj < n: + p_d_base[jjj] = inf + for jjj from 0 <= jjj < n_j: + j = p_j[jjj] + p_d_base[j] = p_c[jjj] - p_v_base[j] + p_to_do[jjj] = j + p_on_to_do[j] = i + p_pred_base[j] = i + + n_to_do = n_j + low = 0 + up = 0 + n_ready = 0 + while True: + if up == low: + low = 0 + up = 0 + umin = inf + # + # Find the minimum d[j] among those to do + # At the same time, move the j at the current + # minimum to the range, low to up, and the remaining + # starting at up. + # + for jjj from 0 <= jjj < n_to_do: + j = p_to_do[jjj] + if p_done[j] == i: + continue + temp = p_d_base[j] + if temp <= umin: + if temp < umin: + up = 0 + umin = temp + p_scan[up] = j + up += 1 + j1 = n + for jjj from low <= jjj < up: + j = p_scan[jjj] + if p_y_base[j] == n: + # Augment if not assigned + j1 = j + break + p_done[j] = i + if j1 < n: + break + # + # get the first j to be scanned + # + # p_j2 points to the j at the i to be replaced + # n_j2 is the number of j in the sparse array for this i + # + j1 = p_scan[low] + low += 1 + p_ready[n_ready] = j1 + n_ready += 1 + i1 = p_y_base[j1] + p_j2 = p_j_base + p_idx_base[i1] + p_c = p_c_base + p_idx_base[i1] + n_j2 = p_count_base[i1] + jidx = bsearch(p_j2, n_j2, j1) + u1 = p_c[jidx] - p_v_base[j1] - umin + j1 = n + for jjj from 0 <= jjj < n_j2: + j = p_j2[jjj] + if p_done[j] != i: + h = p_c[jjj] - p_v_base[j] - u1 + if h < p_d_base[j]: + p_pred_base[j] = i1 + p_d_base[j] = h + if h <= umin: + if p_y_base[j] == n: + j1 = j + break + # + # This one is at the minimum so it can be + # added to the j currently being scanned + # + p_scan[up] = j + p_done[j] = i + up += 1 + elif p_on_to_do[j] != i: + # + # This one is reachable, so it can be added + # to the list of j to consider. + # + p_to_do[n_to_do] = j + p_on_to_do[j] = i + n_to_do += 1 + + if j1 != n: + break + + # Augment + for jjj from 0 <= jjj < n_ready: + j = p_ready[jjj] + temp = p_v_base[j] + p_v_base[j] += p_d_base[j] - umin + while True: + i1 = p_pred_base[j1] + p_y_base[j1] = i1 + j1, p_x_base[i1] = p_x_base[i1], j1 + if i1 == i: + break + # + # Re-establish slackness since we didn't pay attention to u + # + for i from 0 <= i < n: + j = x[i] + p_j = p_j_base + p_idx_base[i] + jidx = bsearch(p_j, p_count_base[i], j) + u[i] = c[p_idx_base[i] + jidx] - v[j] + +# +# A binary search function: +# +# ptr - array to search +# count - # of values in the array +# val - value to search for +# +# returns the position of the value relative to the pointer +# +cdef int bsearch(int *ptr, int count, int val) nogil: + cdef: + int low = 0 + int high = count-1 + int mid + while low <= high: + mid = (low + high) // 2 + if val == ptr[mid]: + return mid + if val > ptr[mid]: + low = mid + 1 + else: + high = mid - 1 diff --git a/spindle_tracker/tracker/lapjv/lapjv.py b/spindle_tracker/tracker/lapjv/lapjv.py new file mode 100644 index 0000000..d11aadc --- /dev/null +++ b/spindle_tracker/tracker/lapjv/lapjv.py @@ -0,0 +1,432 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +''' lapjv.py - Jonker-Volgenant algorithm for linear assignment problem. + +This is an implementation of the Jonker-Volgenant algorithm for solving +the linear assignment problem. The code is derived from the paper, + +R. Jonker, A. Volgenant, "A Shortest Augmenting Path Algorithm for Dense +and Sparse Linear Assignment Problems", Computing 38, p 325-340, 1987 + +CellProfiler is distributed under the GNU General Public License, +but this file is licensed under the more permissive BSD license. +See the accompanying file LICENSE for details. + +Copyright (c) 2003-2009 Massachusetts Institute of Technology +Copyright (c) 2009-2013 Broad Institute +All rights reserved. +''' + +import numpy as np + +try: # pragma: no cover + from ._lapjv import reduction_transfer + from ._lapjv import augmenting_row_reduction + from ._lapjv import augment +except ImportError: + # Try on the fly Cython compilation + import pyximport + pyximport.install(setup_args={'include_dirs': [np.get_include()]}) + + from ._lapjv import reduction_transfer + from ._lapjv import augmenting_row_reduction + from ._lapjv import augment + +__all__ = [] + + +def lapjv(i, j, costs, wants_dual_variables=False, + augmenting_row_reductions=2, use_slow=False): # pragma: no cover + '''Sparse linear assignment solution using Jonker-Volgenant algorithm + + i,j - similarly-sized vectors that pair the object at index i[n] with + the object at index j[n] + + costs - a vector of similar size to i and j that is the cost of pairing + i[n] with j[n]. + + wants_dual_variables - the dual problem reduces the costs using two + vectors, u[i] and v[j] where the solution is the maximum value of + np.sum(u) + np.sum(v) where cost[i,j] - u[i] - v[j] >= 0. + Set wants_dual_variables to True to have u and v returned in + addition to the assignments. + + augmenting_row_reductions - the authors suggest that augmenting + row reduction be performed twice to optimize the u and v + before the augmenting stage. The caller can choose a + different number of reductions by supplying a different + value here. + use_slow: Bool, default False: + use the pure python implementation, useful for debugging purpose + + All costs not appearing in i,j are taken as infinite. Each i in the range, + 0 to max(i) must appear at least once and similarly for j. + + returns (x, y), the pairs of assignments that represent the solution + or (x, y, u, v) if the dual variables are requested. + ''' + i = np.atleast_1d(i).astype(int) + j = np.atleast_1d(j).astype(int) + costs = np.atleast_1d(costs) + + assert len(i) == len(j), "i and j must be the same length" + assert len(i) == len(costs), "costs must be the same length as i" + + # + # Find the number of i with non-infinite cost for each j + # + j_count = np.bincount(j) + assert not np.any(j_count == 0), "all j must be paired with at least one i" + # + # if you order anything by j, this is an index to the minimum for each j + # + j_index = np.hstack([[0], np.cumsum(j_count[:-1])]) + # + # Likewise for i + # + i_count = np.bincount(i) + assert not np.any(i_count == 0), "all i must be paired with at least one j" + i_index = np.hstack([[0], np.cumsum(i_count[:-1])]) + + n = len(j_count) # dimension of the square cost matrix + assert n == len(i_count), "There must be the same number of unique i and j" + + # # # # # # # # + # + # Variable initialization: + # + # The output variables: + # + # x - for each i, the assigned j. -1 indicates uninitialized + # y - for each j, the assigned i + # u, v - the dual variables + # + # A value of x = n or y = n means "unassigned" + # + x = np.ascontiguousarray(np.ones(n, np.uint32) * n) + y = np.ascontiguousarray(np.ones(n, np.uint32) * n, np.uint32) + u = np.ascontiguousarray(np.zeros(n, np.float64)) + + # # # # # # # # + # + # Column reduction + # + # # # # # # # # + # + # For a given j, find the i with the minimum cost. + # + order = np.lexsort((-i, costs, j)) + min_idx = order[j_index] + min_i = i[min_idx] + # + # v[j] is assigned to the minimum cost over all i + # + v = np.ascontiguousarray(costs[min_idx], np.float64) + # + # Find the last j for which i was min_i. + # + x[min_i] = np.arange(n).astype(np.uint32) + y[x[x != n]] = np.arange(n).astype(np.uint32)[x != n] + # + # Three cases for i: + # + # i is not the minimum of any j - i goes on free list + # i is the minimum of one j - v[j] remains the same and y[x[j]] = i + # i is the minimum of more than one j, perform reduction transfer + # + assignment_count = np.bincount(min_i[min_i != n]) + # print assignment_count + assignment_count = np.hstack( + (assignment_count, np.zeros(n - len(assignment_count), int))) + free_i = assignment_count == 0 + one_i = assignment_count == 1 + # print one_i + # order = np.lexsort((costs, i)) Replace with this after all is done + order = np.lexsort((j, i)) + j = np.ascontiguousarray(j[order], np.uint32) + costs = np.ascontiguousarray(costs[order], np.float64) + i_index = np.ascontiguousarray(i_index, np.uint32) + i_count = np.ascontiguousarray(i_count, np.uint32) + if use_slow: + if np.any(one_i): + print(np.ascontiguousarray(np.argwhere(one_i).flatten(), np.uint32)) + slow_reduction_transfer( + np.ascontiguousarray(np.argwhere(one_i).flatten(), np.uint32), + j, i_index, i_count, x, u, v, costs) + # + # Perform augmenting row reduction on unassigned i + # + ii = np.ascontiguousarray(np.argwhere(free_i).flatten(), np.uint32) + if len(ii) > 0: + for iii in range(augmenting_row_reductions): + ii = slow_augmenting_row_reduction( + n, ii, j, i_index, i_count, x, y, u, v, costs) + slow_augment(n, ii, j, i_index, i_count, x, y, u, v, costs) + else: + if np.any(one_i): + reduction_transfer( + np.ascontiguousarray(np.argwhere(one_i).flatten(), np.uint32), + j, i_index, i_count, x, u, v, costs) + # + # Perform augmenting row reduction on unassigned i + # + ii = np.ascontiguousarray(np.argwhere(free_i).flatten(), np.uint32) + if len(ii) > 0: + for iii in range(augmenting_row_reductions): + ii = augmenting_row_reduction(n, ii, j, i_index, i_count, x, y, u, v, costs) + augment(n, ii, + j, i_index, i_count, x, y, u, v, costs) + if wants_dual_variables: + return x, y, u, v + else: + return x, y + + +def slow_reduction_transfer(ii, j, idx, count, x, u, v, c): # pragma: no cover + '''Perform the reduction transfer step from the Jonker-Volgenant algorithm + + The data is input in a ragged array in terms of "i" structured as a + vector of values for each i,j combination where: + + ii - the i to be reduced + j - the j-index of every entry + idx - the index of the first entry for each i + count - the number of entries for each i + x - the assignment of j to i + u - the dual variable "u" which will be updated. It should be + initialized to zero for the first reduction transfer. + v - the dual variable "v" which will be reduced in-place + c - the cost for each entry. + + The code described in the paper is: + + for each assigned row i do + begin + j1:=x[i]; u=min {c[i,j]-v[j] | j=1..n, j != j1}; + v[j1]:=v[j1]-(u-u[i]); + u[i] = u; + end; + + The authors note that reduction transfer can be applied in later stages + of the algorithm but does not seem to provide a substantial benefit + in speed. + ''' + for i in ii: + j1 = x[i] + jj = j[idx[i]:(idx[i]+count[i])] + try: + uu = np.min((c[idx[i]:(idx[i]+count[i])] - v[jj])[jj != j1]) + except ValueError: + print(c[idx[i]:(idx[i] + count[i])] - v[jj]) + print(idx[i], idx[i] + count[i], jj, j1) + continue # raise + v[j1] = v[j1] - uu + u[i] + u[i] = uu + + +def slow_augmenting_row_reduction(n, ii, jj, idx, count, x, y, u, v, c): # pragma: no cover + '''Perform the augmenting row reduction step + from the Jonker-Volgenaut algorithm + + n - the number of i and j in the linear assignment problem + ii - the unassigned i + jj - the j-index of every entry in c + idx - the index of the first entry for each i + count - the number of entries for each i + x - the assignment of j to i + y - the assignment of i to j + u - the dual variable "u" which will be updated. It should be + initialized to zero for the first reduction transfer. + v - the dual variable "v" which will be reduced in-place + c - the cost for each entry. + + returns the new unassigned i + ''' + + ####################################### + # + # From Jonker: + # + # procedure AUGMENTING ROW REDUCTION; + # begin + # LIST: = {all unassigned rows}; + # for all i in LIST do + # repeat + # ul:=min {c[i,j]-v[j] for j=l ...n}; + # select j1 with c [i,j1] - v[j1] = u1; + # u2:=min {c[i,j]-v[j] for j=l ...n,j< >jl} ; + # select j2 with c [i,j2] - v [j2] = u2 and j2 < >j 1 ; + # u[i]:=u2; + # if u1 0 then x [k]:=0; x[i]:=jl; y [ j l ] : = i ; i:=k + # until ul =u2 (* no reduction transfer *) or k=0 i~* augmentation *) + # end + ii = list(ii) + k = 0 + limit = len(ii) + free = [] + while k < limit: + i = ii[k] + k += 1 + j = jj[idx[i]:(idx[i] + count[i])] + uu = c[idx[i]:(idx[i] + count[i])] - v[j] + order = np.lexsort([uu]) + u1, u2 = uu[order[:2]] + j1, j2 = j[order[:2]] + i1 = y[j1] + if u1 < u2: + v[j1] = v[j1] - u2 + u1 + elif i1 != n: + j1 = j2 + i1 = y[j1] + if i1 != n: + if u1 < u2: + k -= 1 + ii[k] = i1 + else: + free.append(i1) + x[i] = j1 + y[j1] = i + return np.array(free, np.uint32) + + +def slow_augment(n, ii, jj, idx, count, x, y, u, v, c): # pragma: no cover + '''Perform the augmentation step to assign unassigned i and j + + n - the # of i and j, also the marker of unassigned x and y + ii - the unassigned i + jj - the ragged arrays of j for each i + idx - the index of the first j for each i + count - the number of j for each i + x - the assignments of j for each i + y - the assignments of i for each j + u,v - the dual variables + c - the costs + ''' + + ################################################## + # + # Augment procedure: from the Jonker paper. + # + # Note: + # cred [i,j] = c [i,j] - u [i] - v[j] + # + # procedure AUGMENT; + # begin + # for all unassigned i* do + # begin + # for j:= 1 ... n do + # begin d[j] := c[i*,j] - v[j] ; pred[j] := i* end; + # READY: = { ) ; SCAN: = { } ; TODO: = { 1 ... n} ; + # repeat + # if SCAN = { } then + # begin + # u = min {d[j] for j in TODO} ; + # SCAN: = {j | d[j] = u} ; + # TODO: = TODO - SCAN; + # for j in SCAN do if y[j]==0 then go to augment + # end; + # select any j* in SCAN; + # i := y[j*]; SCAN := SCAN - {j*} ; READY: = READY + {j*} ; + # for all j in TODO do if u + cred[i,j] < d[j] then + # begin + # d[j] := u + cred[i,j]; pred[j] := i; + # if d[j] = u then + # if y[j] is unassigned then go to augment else + # begin SCAN: = SCAN + {j} ; TODO: = TODO - {j} end + # end + # until false; (* repeat always ends with go to augment *) + #augment: + # (* price updating *) + # for k in READY do v[k]: = v[k] + d[k] - u; + # (* augmentation *) + # repeat + # i: = pred[j]; y[ j ] := i ; k:=j; j:=x[i]; x[i]:= k + # until i = i* + # end + #end + inf = np.sum(c) + 1 + d = np.zeros(n) + cc = np.zeros((n, n)) + cc[:, :] = inf + for i in range(n): + cc[i, jj[idx[i]:(idx[i]+count[i])]] = c[idx[i]:(idx[i] + count[i])] + c = cc + for i in ii: + print("Processing i=%d" % i) + j = jj[idx[i]:(idx[i] + count[i])] + d = c[i, :] - v + pred = np.ones(n, int) * i + on_deck = [] + ready = [] + scan = [] + to_do = list(range(n)) + try: + while True: + print("Evaluating i=%d, n_scan = %d" % (i, len(scan))) + if len(scan) == 0: + ready += on_deck + on_deck = [] + umin = np.min([d[jjj] for jjj in to_do]) + print("umin = %f" % umin) + scan = [jjj for jjj in to_do if d[jjj] == umin] + to_do = [jjj for jjj in to_do if d[jjj] != umin] + for j1 in scan: + if y[j1] == n: + raise StopIteration() + j1 = scan[0] + iii = y[j1] + print("Consider replacing i=%d, j=%d" % (iii, j1)) + scan = scan[1:] + on_deck += [j1] + u1 = c[iii, j1] - v[j1] - umin + for j1 in list(to_do): + h = c[iii, j1] - v[j1] - u1 + print("Consider j=%d as replacement, c[%d,%d]=%f,v[%d]=%f,h=%f, d[j]= %f" % + (j1, iii, j1, c[iii, j1], j1, v[j1], h, d[j1])) + if h < d[j1]: + print("Add to chain") + pred[j1] = iii + if h == umin: + if y[j1] == n: + raise StopIteration() + print("Add to scan") + scan += [j1] + to_do.remove(j1) + d[j1] = h + + except StopIteration: + # Augment + print("Augmenting %d" % j1) + for k in ready: + temp = v[k] + v[k] = v[k] + d[k] - umin + print("v[%d] %f -> %f" % (k, temp, v[k])) + while True: + iii = pred[j1] + print("y[%d] %d -> %d" % (j1, y[j1], iii)) + y[j1] = iii + j1, x[iii] = x[iii], j1 + if iii == i: + break + # + # Re-establish slackness since we didn't pay attention to u + # + for i in range(n): + j = x[i] + u[i] = c[i, j] - v[j] + +# if __name__=="__main__": +# i = np.load("c:/temp/bad/img-1557/i.npy") +# j = np.load("c:/temp/bad/img-1557/j.npy") +# costs = np.load("c:/temp/bad/img-1557/c.npy") +# lapjv(i, j, costs) diff --git a/spindle_tracker/tracker/matrix/__init__.py b/spindle_tracker/tracker/matrix/__init__.py new file mode 100644 index 0000000..7bd40ab --- /dev/null +++ b/spindle_tracker/tracker/matrix/__init__.py @@ -0,0 +1,13 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from .matrix import CostMatrix + +__all__ = ["CostMatrix"] diff --git a/spindle_tracker/tracker/matrix/matrix.py b/spindle_tracker/tracker/matrix/matrix.py new file mode 100644 index 0000000..b1bcc94 --- /dev/null +++ b/spindle_tracker/tracker/matrix/matrix.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import logging +import numpy as np + +from ..lapjv import lapjv + +log = logging.getLogger(__name__) + +__all__ = [] + + +class CostMatrix(object): + """This class represents the cost matrix which will be given to LAPJV solver. + Cost matrix is built from matrices blocks. + + Parameters + ---------- + blocks : 2D list of :class:`numpy.ndarray` or None + Each array value is a block or None (filled with np.nan). + """ + + def __init__(self, blocks): + """ + """ + + if isinstance(blocks, list): + self.blocks = np.atleast_2d(blocks) + else: + self.blocks = blocks + + self._concatenate_blocks() + self._fill_lrb() + + self.in_links = None + self.out_links = None + self.assigned_costs = None + + def solve(self): + """Solves the linear assignement problem on `self.mat`. + """ + + idxs_in, idxs_out, self.costs = self.get_flat() + self.in_links, self.out_links = lapjv(idxs_in, idxs_out, self.costs) + + def get_masked(self): + """Get masked array. + + Returns + ------- + mat : `numpy.ndarray` + A masked array on `numpy.nan` of the cost matrix. + """ + return np.ma.masked_invalid(self.mat) + + def get_flat(self): + """Get flat vectors according to the cost matrix. + + Returns + ------- + idxs_in : 1D `numpy.ndarray` + Y axis indexes. + idxs_out : 1D `numpy.ndarray` + X axis indexes. + costs : 1D `numpy.ndarray` + Associated costs (matrix value). + """ + masked = self.get_masked() + costs = masked.compressed() + idxs_in, idxs_out = np.where( + np.logical_not(np.ma.getmask(masked))) + return idxs_in, idxs_out, costs + + def _fill_lrb(self): + """Fill the lower contiguous block of NaN values with the transposed + matrix of the related upper block. + """ + + # Find the lower contiguous block + x, y = self.get_shapes() + i = np.sum(x[:len(x) / 2]) + j = np.sum(y[:len(y) / 2]) + + # Copy the upper left block and transpose + lrb = self.mat[:i, :j].T.copy() + + # Give a value higher than the max value + lrb[np.isfinite(lrb)] = self.get_masked().max() * 1.1 + + self.mat[i:, j:] = lrb + + def _concatenate_blocks(self): + """Concatenate a matrix of block to a single matrix : the cost matrix. + """ + + row_shapes, col_shapes = self.get_shapes() + + nrows = row_shapes.sum() + ncols = col_shapes.sum() + self.mat = np.empty((nrows, ncols)) + self.mat.fill(np.nan) + + row_corners = row_shapes.cumsum() - row_shapes + col_corners = col_shapes.cumsum() - col_shapes + + for i, (start_i, shape_i) in enumerate(zip(row_corners, row_shapes)): + for j, (start_j, shape_j) in enumerate(zip(col_corners, col_shapes)): + self.mat[start_i:start_i+shape_i, + start_j:start_j+shape_j] = self.blocks[i, j] + + def get_shapes(self): + """Get whole matrix blocks shape. + + Returns + ------- + row_shapes : 1D :class:`numpy.ndarray` + Row shapes. + col_shapes : 1D :class:`numpy.ndarray` + Column shapes. + """ + row_shapes = np.zeros(self.blocks.shape[0]) + col_shapes = np.zeros(self.blocks.shape[1]) + + for n, row in enumerate(self.blocks): + shapes = [] + for block in row: + if isinstance(block, np.ndarray): + shapes.append(block.shape[0]) + if np.unique(shapes).size != 1: + raise ValueError("Blocks don't fit horizontally") + row_shapes[n] = shapes[0] + + for n, col in enumerate(self.blocks.T): + shapes = [] + for block in col: + if isinstance(block, np.ndarray): + shapes.append(block.shape[1]) + if np.unique(shapes).size != 1: + raise ValueError("Blocks don't fit vertically") + col_shapes[n] = shapes[0] + + return row_shapes.astype(np.int), col_shapes.astype(np.int) + + def view(self, ax=None, colormap="gray", **kwargs): # pragma: no cover + """Display cost matrice on a plot. + + Parameters + ---------- + colormap : string + Matplotlib colormap to use. See + (http://wiki.scipy.org/Cookbook/Matplotlib/Show_colormaps). + """ + + import matplotlib.pyplot as plt + + if ax: + fig = ax.get_figure() + else: + fig, ax = plt.subplots() + + rec_shape = np.array(self.mat.shape) + size = rec_shape[0] + row_shapes, col_shapes = self.get_shapes() + + # Show matrix + cax = ax.imshow(self.mat, interpolation='none', cmap=colormap, + extent=[0, size, 0, size], **kwargs) + fig.colorbar(cax) + + ax.grid(False) + + # Minor axis + for i in range(1, size): + ax.axvline(x=i, ymin=0, ymax=size, linewidth=1, color='black', + alpha=0.7) + ax.axhline(y=i, xmin=0, xmax=size, linewidth=1, color='black', + alpha=0.7) + + # Add labels + ax.set_xticks(np.arange(0.5, size + 0.5)) + ax.set_yticks(np.arange(0.5, size + 0.5)) + + col_labels = np.hstack(np.array([list(range(s)) + for s in col_shapes.astype(np.int)])) + row_labels = np.hstack(np.array([list(reversed(range(s))) + for s in row_shapes[::-1].astype(np.int)])) + + ax.set_xticklabels(col_labels) + ax.set_yticklabels(row_labels) + + ax.xaxis.tick_top() + + # Add major axis + for col_id, row_id in zip(np.cumsum(col_shapes), np.cumsum(row_shapes)): + ax.axvline(x=col_id, ymin=0, ymax=size, linewidth=3, color='black') + ax.axhline(y=size - row_id, xmin=0, xmax=size, linewidth=3, color='black') + + # Display nan value + for p in np.argwhere(np.isnan(self.mat)): + x = p[1] + 0.5 + y = size - 1 - p[0] + 0.5 + ax.scatter(x, y, marker='x', s=500, color='red', alpha=0.3) + + # Show LAPJV solutions + if self.out_links is not None: + for idx_out, idx_in in enumerate(self.out_links): + ax.scatter(idx_out + 0.5, size - 1 - idx_in + 0.5, marker='o', + s=500, color='green', alpha=0.4) + + ax.set_xlim(0, size) + ax.set_ylim(0, size) + + return ax diff --git a/spindle_tracker/tracker/solver/__init__.py b/spindle_tracker/tracker/solver/__init__.py new file mode 100644 index 0000000..f234424 --- /dev/null +++ b/spindle_tracker/tracker/solver/__init__.py @@ -0,0 +1,15 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from .solver import AbstractSolver +from .by_frame_solver import ByFrameSolver +from .gap_close_solver import GapCloseSolver + +__all__ = ["AbstractSolver", "ByFrameSolver", "GapCloseSolver"] diff --git a/spindle_tracker/tracker/solver/by_frame_solver.py b/spindle_tracker/tracker/solver/by_frame_solver.py new file mode 100644 index 0000000..1cfaede --- /dev/null +++ b/spindle_tracker/tracker/solver/by_frame_solver.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import logging +log = logging.getLogger(__name__) + +import numpy as np + +from ...utils import print_progress + +from ..matrix import CostMatrix +from ..cost_function import AbstractCostFunction +from ..cost_function.brownian import BrownianLinkCostFunction +from ..cost_function.diagonal import DiagonalCostFunction +from ..cost_function.directed import BasicDirectedLinkCostFunction + +from . import AbstractSolver + +__all__ = [] + + +class ByFrameSolver(AbstractSolver): + """ + + Parameters + ---------- + trajs : :class:`pandas.DataFrame` + cost_functions : list of list + """ + def __init__(self, trajs, cost_functions, coords=['x', 'y', 'z']): + + super(self.__class__, self).__init__(trajs) + + self.t_in = 0 + self.t_out = 0 + + self.coords = coords + + self.trajs.check_trajs_df_structure(index=['t_stamp', 'label'], + columns=['t'] + coords) + + self.link_cf = cost_functions['link'] + self.check_cost_function_type(self.link_cf, AbstractCostFunction) + + self.birth_cf = cost_functions['birth'] + self.check_cost_function_type(self.birth_cf, AbstractCostFunction) + + self.death_cf = cost_functions['death'] + self.check_cost_function_type(self.death_cf, AbstractCostFunction) + + self.max_assigned_cost = self.death_cf.context['cost'] + + @classmethod + def for_brownian_motion(cls, trajs, + max_speed, + penalty=1.05, + coords=['x', 'y', 'z']): + """ + + Parameters + ---------- + trajs : :class:`spindle_tracker.trajectories.Trajectories` + max_speed : float + Maximum objects velocity + penalty : float + coords : list + Which columns to choose in trajs when computing distances. + + Examples + -------- + >>> true_trajs = Trajectories(data.brownian_trajectories_generator()) + >>> + >>> # Remove labels + >>> true_trajs.relabel(np.arange(len(true_trajs))) + >>> + >>> solver = ByFrameSolver.for_brownian_motion(true_trajs, max_speed=5, penalty=2.) + >>> new_trajs = solver.track() + 2014:INFO:by_frame_solver: Initiating frame by frame tracking. + 2014:INFO:by_frame_solver: Frame by frame tracking done. 5 segments found (500 before). + """ + guessed_cost = np.float(max_speed ** 2) * penalty + diag_context = {'cost': guessed_cost} + diag_params = {'penalty': penalty, 'coords': coords} + + link_cost_func = BrownianLinkCostFunction(parameters={'max_speed': max_speed, + 'coords': coords}) + birth_cost_func = DiagonalCostFunction(context=diag_context, + parameters=diag_params) + death_cost_func = DiagonalCostFunction(context=diag_context, + parameters=diag_params) + + cost_functions = {'link': link_cost_func, + 'birth': birth_cost_func, + 'death': death_cost_func} + + return cls(trajs, cost_functions, coords=coords) + + @classmethod + def for_directed_motion(cls, trajs, + max_speed, + penalty=1.05, + past_traj_time=10, + smooth_factor=0, + interpolation_order=1, + coords=['x', 'y', 'z']): + """Link objects according to their distance found in trajectories frame by frame. + + Parameters + ---------- + trajs : :class:`spindle_tracker.trajectories.Trajectories` + max_speed : float + Maximum objects velocity + penalty : float + past_traj_time : float + Time during which the tracker can make a gap close. Above this time all gap + close event will discarded. + smooth_factor : float + Smoothing condition used in :func:`scipy.interpolate.splrep` + interpolation_order : int + The order of the spline fit. See :func:`scipy.interpolate.splrep` + coords : list + Which columns to choose in trajs when computing distances. + """ + + parameters = {'max_speed': max_speed, + 'past_traj_time': past_traj_time, + 'smooth_factor': smooth_factor, + 'interpolation_order': interpolation_order, + 'coords': coords} + + guessed_cost = 20 * penalty + diag_context = {'cost': guessed_cost} + diag_params = {'penalty': penalty} + link_context = {'trajs': trajs} + + link_cost_func = BasicDirectedLinkCostFunction(parameters=parameters, + context=link_context) + + birth_cost_func = DiagonalCostFunction(context=diag_context, + parameters=diag_params) + death_cost_func = DiagonalCostFunction(context=diag_context, + parameters=diag_params) + + cost_functions = {'link': link_cost_func, + 'birth': birth_cost_func, + 'death': death_cost_func} + + return cls(trajs, cost_functions, coords=coords) + + @property + def blocks_structure(self): + return [[self.link_cf.mat, self.death_cf.mat], + [self.birth_cf.mat, None]] + + @property + def pos_in(self): + return self.trajs.loc[self.t_in] + + @property + def pos_out(self): + return self.trajs.loc[self.t_out] + + def track(self, progress_bar=False, progress_bar_out=None): + """ + + Returns + ------- + self.trajs : :class:`pandas.DataFrame` + progress_bar : bool + Display progress bar + progress_bar_out : OutStream + For testing purpose only + """ + + log.info('Initiating frame by frame tracking.') + + old_labels = self.trajs.index.get_level_values('label').values + self.trajs['new_label'] = old_labels.astype(np.float) + ts_in = self.trajs.t_stamps[:-1] + ts_out = self.trajs.t_stamps[1:] + + n_labels_before = len(self.trajs.labels) + + n = len(ts_in) + for i, (t_in, t_out) in enumerate(zip(ts_in, ts_out)): + if progress_bar: + progress = i / n * 100 + message = "t_in : {} | t_out {}".format(t_in, t_out) + print_progress(progress, message=message, out=progress_bar_out) + + self.one_frame(t_in, t_out) + + if progress_bar: + print_progress(-1) + + self.relabel_trajs() + + n_labels_after = len(self.trajs.labels) + mess = 'Frame by frame tracking done. {} segments found ({} before).' + log.info(mess.format(n_labels_after, n_labels_before)) + return self.trajs + + def one_frame(self, t_in, t_out): + """ + + Parameters + ---------- + t_in : int + t_out : int + """ + + self.t_in = t_in + self.t_out = t_out + + pos_in = self.pos_in + pos_out = self.pos_out + + self.link_cf.context['pos_in'] = pos_in + self.link_cf.context['pos_out'] = pos_out + self.link_cf.get_block() + + self.birth_cf.context['objects'] = pos_out + self.birth_cf.get_block() + + self.death_cf.context['objects'] = pos_in + self.death_cf.get_block() + + self.cm = CostMatrix(self.blocks_structure) + self.cm.solve() + self.assign() + + def assign(self): + """ + """ + + row_shapes, col_shapes = self.cm.get_shapes() + last_in_link = row_shapes[0] + last_out_link = col_shapes[0] + + new_labels_in = self.trajs.loc[self.t_in]['new_label'].values + new_labels_out = np.arange(last_out_link) + + for idx_out, idx_in in enumerate(self.cm.out_links[:last_out_link]): + if idx_in >= last_in_link: + # new segment + new_label = self.trajs['new_label'].max() + 1. + else: + # assignment + new_label = new_labels_in[idx_in] + self._update_max_assign_cost(self.cm.mat[idx_in, idx_out]) + + new_labels_out[idx_out] = new_label + self.trajs.loc[self.t_out, 'new_label'] = new_labels_out + # The line below looks much slower than the two lines above + # self.trajs.loc[self.t_out, 'new_label'].iloc[idx_out] = new_label + + def _update_max_assign_cost(self, cost): + """ + """ + + if cost > self.max_assigned_cost: + self.max_assigned_cost = cost + new_b_cost = self.max_assigned_cost * self.birth_cf.parameters['penalty'] + new_d_cost = self.max_assigned_cost * self.death_cf.parameters['penalty'] + self.birth_cf.context['cost'] = new_b_cost + self.death_cf.context['cost'] = new_d_cost diff --git a/spindle_tracker/tracker/solver/gap_close_solver.py b/spindle_tracker/tracker/solver/gap_close_solver.py new file mode 100644 index 0000000..ab677b3 --- /dev/null +++ b/spindle_tracker/tracker/solver/gap_close_solver.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import numpy as np +import logging + +log = logging.getLogger(__name__) + +from ..matrix import CostMatrix + +from ..cost_function import AbstractCostFunction +from ..cost_function.brownian import BrownianGapCloseCostFunction +from ..cost_function.diagonal import DiagonalCostFunction + +from . import AbstractSolver + +__all__ = [] + + +class GapCloseSolver(AbstractSolver): + """ + + Parameters + ---------- + trajs : :class:`pandas.DataFrame` + cost_functions : list of list + """ + def __init__(self, + trajs, + cost_functions, + maximum_gap, + use_t_stamp=True, + coords=['x', 'y', 'z']): + + super(self.__class__, self).__init__(trajs) + + log.info('Initiating gap close solver') + self.coords = coords + + self.trajs.check_trajs_df_structure(index=['t_stamp', 'label'], + columns=['t'] + coords) + + self.link_cf = cost_functions['link'] + self.check_cost_function_type(self.link_cf, AbstractCostFunction) + + self.birth_cf = cost_functions['birth'] + self.check_cost_function_type(self.birth_cf, AbstractCostFunction) + + self.death_cf = cost_functions['death'] + self.check_cost_function_type(self.death_cf, AbstractCostFunction) + + self.maximum_gap = maximum_gap + self.use_t_stamp = use_t_stamp + + @classmethod + def for_brownian_motion(cls, trajs, + max_speed, + maximum_gap, + link_percentile=90, + use_t_stamp=True, + coords=['x', 'y', 'z']): + """Close gaps found in different trajectories. + + Parameters + ---------- + trajs : :class:`spindle_tracker.trajectories.Trajectories` + Trajectories where to close gaps. + max_speed : float + Maximum speed of objects in trajectories. + maximum_gap : float + Maximum gap size which can be closed. + link_percentile : float + TODO + use_t_stamp : bool + If True `t_stamp` index will be used when computing maximum gap. If False, column 't' + will be used. + coords : list + Which columns to choose in trajs when computing distances. + + Examples + -------- + >>> trajs = data.with_gaps_df() + >>> max_speed = 10. + >>> maximum_gap = 5 + >>> gc_solver = GapCloseSolver.for_brownian_motion(trajs, max_speed=max_speed, + >>> maximum_gap=maximum_gap, + >>> use_t_stamp=True) + >>> new_trajs = gc_solver.track() + 2014-08-11:INFO:spindle_tracker.tracker.solver.gap_close_solver: Initiating gap close solver + 2014-08-11:INFO:spindle_tracker.tracker.solver.gap_close_solver: Find candidates among 7 segments + 2014-08-11:INFO:spindle_tracker.tracker.solver.gap_close_solver: 6 candidates found + 2014-08-11:INFO:spindle_tracker.tracker.solver.gap_close_solver: Build cost functions + 2014-08-11:INFO:spindle_tracker.tracker.solver.gap_close_solver: Assigning results + 2014-08-11:INFO:spindle_tracker.tracker.solver.gap_close_solver: 4 gap close event processed + """ + + guessed_cost = float(max_speed ** 2) + + diag_context = {'cost': guessed_cost} + diag_params = {'link_percentile': link_percentile, 'coords': coords} + + link_cost_func = BrownianGapCloseCostFunction(parameters={'max_speed': max_speed, + 'coords': coords}) + birth_cost_func = DiagonalCostFunction(context=diag_context, + parameters=diag_params) + death_cost_func = DiagonalCostFunction(context=diag_context, + parameters=diag_params) + + cost_functions = {'link': link_cost_func, + 'birth': birth_cost_func, + 'death': death_cost_func} + + return cls(trajs, cost_functions, maximum_gap, use_t_stamp=use_t_stamp, coords=coords) + + @property + def blocks_structure(self): + return [[self.link_cf.mat, self.death_cf.mat], + [self.birth_cf.mat, None]] + + def track(self, progress_bar=False, progress_bar_out=None): + """For details about link_percentile, see below from + K. Jaqaman and G. Danuser, Nature Methods, 2008. + + TFA: For track segment ends and starts, the alternative cost (b and d in Fig. 1c) had to + be comparable in magnitude to the costs of potential assignments, making the rejection + of gap closing, merging and splitting an accessible alternative. At the same time, the + alternative cost had to be at the higher end of the range of potential assignment costs, + so that the algorithm did not fail to close gaps and capture merge and split events. We + performed empirical tests of the sensitivity of tracking results to variations in the + alternative cost. We found that in a range 80th – 100th percentile of all potential + assignment costs the outcome of gap closing, merging and splitting varied negligibly + (data not shown). We attribute this robustness to the fact that track initiations and + terminations competed globally, in space and time, with all other potential assignments. + Thus, the alternative cost was taken as the 90th percentile. + """ + + idxs_in, idxs_out = self._get_candidates() + + if len(idxs_in) > 10000: + log.warning("Number of segment's candidates is very high." + " Tracking can be very slow.") + + self.link_cf.context['trajs'] = self.trajs + self.link_cf.context['idxs_in'] = idxs_in + self.link_cf.context['idxs_out'] = idxs_out + self.birth_cf.context['objects'] = self.trajs.labels + self.death_cf.context['objects'] = self.trajs.labels + + if not len(idxs_in): + log.info('No gap needs closing here') + return self.trajs + + old_labels = self.trajs.index.get_level_values('label').values + self.trajs.loc[:, 'new_label'] = old_labels.astype(np.float) + + log.info('Build cost functions') + + link_percentile_b = self.birth_cf.parameters['link_percentile'] + link_percentile_d = self.death_cf.parameters['link_percentile'] + self.link_cf.get_block() + link_costs = np.ma.masked_invalid(self.link_cf.mat).compressed() + + if not link_costs.shape[0]: + log.info('No suitable gap to fill') + return self.trajs + + cost_b = np.percentile(link_costs, link_percentile_b) + cost_d = np.percentile(link_costs, link_percentile_d) + self.birth_cf.context['cost'] = cost_b + self.birth_cf.get_block() + self.death_cf.context['cost'] = cost_d + self.death_cf.get_block() + + self.cm = CostMatrix(self.blocks_structure) + self.cm.solve() + self.assign() + + return self.trajs + + def _get_candidates(self): + """Find candidate pair of segments for gap closing. + """ + seg_idx = self.trajs.segment_idxs + + log.info('Find candidates among {} segments'.format(len(seg_idx))) + + max_gap = self.maximum_gap + labels = self.trajs.labels + + if self.use_t_stamp: + bounds = self.trajs.get_bounds() + else: + bounds = self.trajs.get_bounds(column='t') + + bounds = np.array(list(bounds.values())) + + start_times = bounds[:, 0] + stop_times = bounds[:, 1] + ss_in, ss_out = np.meshgrid(labels, labels) + + gaps_size = start_times[ss_out] - stop_times[ss_in] + + matches = np.argwhere((gaps_size > 0) * (gaps_size <= max_gap)) + + if not matches.shape[0]: + log.info("No candidate found") + return [], [] + + matches_in = matches[:, 1] + matches_out = matches[:, 0] + + if not self.use_t_stamp: + start_times = self.trajs.get_t_stamps_correspondences(start_times, column='t') + stop_times = self.trajs.get_t_stamps_correspondences(stop_times, column='t') + + in_idxs = np.column_stack([stop_times, self.trajs.labels]) + in_idxs = in_idxs[matches_in] + out_idxs = np.column_stack([start_times, self.trajs.labels]) + out_idxs = out_idxs[matches_out] + + # Convert idx in list of tuple + # Otherwise trajs.loc[] indexing fails. + # See https://github.com/pydata/pandas/issues/7981 + in_idxs = [tuple(v) for v in in_idxs] + out_idxs = [tuple(v) for v in out_idxs] + + log.info("{} candidates found".format(len(in_idxs))) + + return in_idxs, out_idxs + + def assign(self): + """ + """ + log.info('Assigning results') + + row_shapes, col_shapes = self.cm.get_shapes() + old_labels = self.trajs.index.get_level_values(level='label').values + new_labels = old_labels.copy() + unique_old = self.trajs.labels.copy() # np.unique(old_labels) + unique_new = self.trajs.labels.copy() # np.unique(new_labels) + + last_in_link = row_shapes[0] + last_out_link = col_shapes[0] + + n = 0 + for idx_out, idx_in in enumerate(self.cm.out_links[:last_out_link]): + if idx_in >= last_in_link: + # no merge + unique_new[idx_out] = unique_new.max() + 1 + else: + # do merge + new_label = unique_new[idx_in] + unique_new[idx_out] = new_label + n += 1 + + for old, new in zip(unique_old, unique_new): + new_labels[old_labels == old] = new + + log.info("{} gap close event processed".format(n)) + self.relabel_trajs(new_labels) + return self.trajs diff --git a/spindle_tracker/tracker/solver/solver.py b/spindle_tracker/tracker/solver/solver.py new file mode 100644 index 0000000..71293f9 --- /dev/null +++ b/spindle_tracker/tracker/solver/solver.py @@ -0,0 +1,58 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from ...trajectories import Trajectories + +__all__ = [] + + +class AbstractSolver(object): + """ + + Parameters + ---------- + trajs : :class:`spindle_tracker.trajectories.Trajectories` + The trajectories + """ + + def __init__(self, trajs): + self.trajs = Trajectories(trajs) + + def check_cost_function_type(self, obj, cost_funtion_type): + """Check wether an object inherit from another one. + + Parameters + ---------- + obj : object + cost_funtion_type : class name + + Raises + ------ + TypeError : `obj` type does not inherit from `cost_funtion_type` + """ + + error_mess = '''The cost function {} doesn't inherit from {}''' + + if not isinstance(obj, cost_funtion_type): + raise TypeError(error_mess.format(obj, cost_funtion_type.__name__)) + + def relabel_trajs(self, new_labels=None): + """ + Sets the trajectory index `label` to new values. + + Parameters + ---------- + new_labels: :class:`numpy.ndarray` or None, default None + The new label. If it is not provided, the function wil look for + will look for a column named "new_label" in `trajs` and use this + as the new label index + + """ + self.trajs.relabel(new_labels=new_labels) diff --git a/spindle_tracker/tracker/utils/__init__.py b/spindle_tracker/tracker/utils/__init__.py new file mode 100644 index 0000000..bdbabaa --- /dev/null +++ b/spindle_tracker/tracker/utils/__init__.py @@ -0,0 +1,13 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from . scores import get_scores_on_trajectories + +__all__ = ["get_scores_on_trajectories"] diff --git a/spindle_tracker/tracker/utils/scores.py b/spindle_tracker/tracker/utils/scores.py new file mode 100644 index 0000000..7bf15c0 --- /dev/null +++ b/spindle_tracker/tracker/utils/scores.py @@ -0,0 +1,59 @@ + +# -*- coding: utf-8 -*- + + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import numpy as np + +__all__ = [] + + +def get_scores_on_trajectories(trajs, coords=['x', 'y', 'z']): + """ + + Parameters + ---------- + trajs : :class:`pandas.DataFrame` + :class:`pandas.MultiIndex` need to contain 't_stamp' and 'label' and columns need to have + at least 'true_label' + coords : list + Features on which process scoring. + + Returns + ------- + min_chi_square : float + conserved_trajectories_number : float + Between 0 and 1. + scores : :class:`numpy.ndarray` + Chi square matrix. + """ + + gp_new = trajs.groupby(level='label').groups + gp_true = trajs.groupby('true_label').groups + + scores = np.empty((len(gp_new), len(gp_true))) + + for new_label, new_id in gp_new.items(): + for true_label, true_id in gp_true.items(): + + p = trajs.loc[new_id] + h = trajs.loc[true_id] + + p = p.reset_index(level='label') + h = h.reset_index(level='label') + h = h.reindex_like(p) + + h = h.dropna() + + score = ((h - p)[['x', 'y', 'z']].sum(axis=1) ** 2).mean() + scores[np.int(new_label), np.int(true_label)] = score + + min_chi_square = np.min(scores, axis=1).sum() + conserved_trajectories_number = scores.shape[1] / scores.shape[0] + + return min_chi_square, conserved_trajectories_number, scores diff --git a/spindle_tracker/tracking/begin_mitosis_tracker.py b/spindle_tracker/tracking/begin_mitosis_tracker.py index 04abf32..f365bf5 100644 --- a/spindle_tracker/tracking/begin_mitosis_tracker.py +++ b/spindle_tracker/tracking/begin_mitosis_tracker.py @@ -1,19 +1,19 @@ import gc import logging -log = logging.getLogger(__name__) - import numpy as np import pandas as pd import scipy from skimage import measure -from sktracker.trajectories import Trajectories -from sktracker.tracker.solver import ByFrameSolver -from sktracker.io import TiffFile +from ..trajectories import Trajectories +from ..tracker.solver import ByFrameSolver +from ..io import TiffFile from ..tracking import Tracker +log = logging.getLogger(__name__) + class BeginMitosisTracker(Tracker): diff --git a/spindle_tracker/tracking/cen2_tracker.py b/spindle_tracker/tracking/cen2_tracker.py index 4d34258..96fafe1 100644 --- a/spindle_tracker/tracking/cen2_tracker.py +++ b/spindle_tracker/tracking/cen2_tracker.py @@ -7,15 +7,15 @@ import scipy.spatial.distance as dist import scipy.cluster.hierarchy as hier -log = logging.getLogger(__name__) - -from sktracker.tracker.solver import ByFrameSolver -from sktracker.utils import progress_apply +from ..tracker.solver import ByFrameSolver +from ..utils.progress import progress_apply from ..trajectories import Trajectories from ..io.trackmate import trackmate_peak_import from ..tracking import Tracker +log = logging.getLogger(__name__) + class Cen2Tracker(Tracker): diff --git a/spindle_tracker/tracking/ndc80_tracker.py b/spindle_tracker/tracking/ndc80_tracker.py index 36ceccc..ad2a696 100644 --- a/spindle_tracker/tracking/ndc80_tracker.py +++ b/spindle_tracker/tracking/ndc80_tracker.py @@ -1,6 +1,5 @@ import itertools import logging -log = logging.getLogger(__name__) import numpy as np import pandas as pd @@ -9,11 +8,13 @@ from ..trajectories import Trajectories from ..utils import print_progress -from sktracker.tracker.solver import ByFrameSolver -from sktracker.tracker.solver import GapCloseSolver +from ..tracker.solver import ByFrameSolver +from ..tracker.solver import GapCloseSolver from ..tracking import Tracker +log = logging.getLogger(__name__) + class Ndc80Tracker(Tracker): diff --git a/spindle_tracker/tracking/tracker.py b/spindle_tracker/tracking/tracker.py index af1b626..904a557 100644 --- a/spindle_tracker/tracking/tracker.py +++ b/spindle_tracker/tracking/tracker.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np -from sktracker.detection import peak_detector +from ..detector import peak_detector from ..trajectories import Trajectories diff --git a/spindle_tracker/trajectories/measures/transformation.py b/spindle_tracker/trajectories/measures/transformation.py index de794c1..b17e16a 100644 --- a/spindle_tracker/trajectories/measures/transformation.py +++ b/spindle_tracker/trajectories/measures/transformation.py @@ -247,7 +247,7 @@ def interp_series(series, new_index): -------- >>> import pandas as pd >>> import numpy as np - >>> from sktracker.trajectories.measures.transformation import interp_series + >>> from spindle_tracker.trajectories.measures.transformation import interp_series >>> series = pd.Series([0, 10, 20, 40, 50, 60], index=[0, 1, 2, 4, 5, 6]) >>> new_index = np.arange(0.5, 7.5, 1) >>> inter = interp_series(series, new_index) diff --git a/spindle_tracker/trajectories/trajectories.py b/spindle_tracker/trajectories/trajectories.py index eb9ebe9..3a8799a 100644 --- a/spindle_tracker/trajectories/trajectories.py +++ b/spindle_tracker/trajectories/trajectories.py @@ -53,7 +53,7 @@ class Trajectories(pd.DataFrame): Examples -------- - >>> from sktracker import data + >>> from spindle_tracker import data >>> from spindle_tracker.trajectories import Trajectories >>> >>> trajs = data.with_gaps_df() @@ -891,8 +891,8 @@ def show(self, xaxis='t', yaxis='x', Examples -------- - >>> from sktracker import data - >>> from sktracker.tracker.solver import ByFrameSolver + >>> from spindle_tracker import data + >>> from spindle_tracker.tracker.solver import ByFrameSolver >>> import matplotlib.pylab as plt >>> true_trajs = data.brownian_trajectories_generator(p_disapear=0.1) >>> solver = ByFrameSolver.for_brownian_motion(true_trajs, max_speed=2) diff --git a/spindle_tracker/ui/tracker_widget.py b/spindle_tracker/ui/tracker_widget.py index cb9769f..402967e 100644 --- a/spindle_tracker/ui/tracker_widget.py +++ b/spindle_tracker/ui/tracker_widget.py @@ -1,10 +1,11 @@ import logging -log = logging.getLogger(__name__) from pyqtgraph.Qt import QtGui from pyqtgraph.Qt import QtCore -from sktracker.ui import TrajectoriesWidget +from .trajectories_widget import TrajectoriesWidget + +log = logging.getLogger(__name__) class TrackersWidget(QtGui.QWidget): diff --git a/spindle_tracker/ui/trajectories_widget.py b/spindle_tracker/ui/trajectories_widget.py index 91a666a..fbaabca 100644 --- a/spindle_tracker/ui/trajectories_widget.py +++ b/spindle_tracker/ui/trajectories_widget.py @@ -1,12 +1,4 @@ -# -*- coding: utf-8 -*- - -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - import logging -log = logging.getLogger(__name__) import pyqtgraph as pg import numpy as np @@ -18,6 +10,8 @@ from .viewbox import DataSelectorViewBox +log = logging.getLogger(__name__) + class TrajectoriesWidget(QtGui.QWidget): """Display a Trajectories object. Trajectories can be modified manually inside the widget: @@ -26,7 +20,7 @@ class TrajectoriesWidget(QtGui.QWidget): Parameters ---------- - trajs : :class:`sktracker.trajectories.Trajectories` or list + trajs : :class:`spindle_tracker.trajectories.Trajectories` or list Trajectories or list of Trajectories to show in the widget. names : str or list Name(s) of the Trajectories (optional). @@ -46,9 +40,9 @@ class TrajectoriesWidget(QtGui.QWidget): Examples -------- - >>> from sktracker.ui import TrajectoriesWidget - >>> from sktracker import data - >>> from sktracker.trajectories import Trajectories + >>> from spindle_tracker.ui import TrajectoriesWidget + >>> from spindle_tracker import data + >>> from spindle_tracker.trajectories import Trajectories >>> trajs = data.brownian_trajs_df() >>> trajs = Trajectories(trajs) >>> # Relabel to true label diff --git a/spindle_tracker/utils/progress.py b/spindle_tracker/utils/progress.py index 0ab7e20..479710b 100644 --- a/spindle_tracker/utils/progress.py +++ b/spindle_tracker/utils/progress.py @@ -1,17 +1,5 @@ - -# -*- coding: utf-8 -*- - - -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - import sys -__all__ = [] - def print_progress(progress, message=None, out=None): """Display a progress bar filled with a variable value. print_progress @@ -34,7 +22,7 @@ def print_progress(progress, message=None, out=None): Examples -------- >>> import time - >>> from sktracker.utils import print_progress + >>> from spindle_tracker.utils import print_progress >>> n = 5 >>> for i in range(n): @@ -93,7 +81,7 @@ def progress_apply(g, func, out=None, *args, **kwargs): -------- >>> import pandas as pd >>> import numpy as np - >>> from sktracker.utils import progress_apply + >>> from spindle_tracker.utils import progress_apply >>> df = pd.DataFrame(np.random.choice(range(100), (1000000, 4)), columns=['A', 'B', 'C', 'D']) >>> gp = df.groupby('A') diff --git a/spindle_tracker/utils/sort.py b/spindle_tracker/utils/sort.py new file mode 100644 index 0000000..75c9bc7 --- /dev/null +++ b/spindle_tracker/utils/sort.py @@ -0,0 +1,24 @@ +import re + +__all__ = ["natural_keys"] + + +def _atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + """Sort list of string in a human way. + + See: http://stackoverflow.com/questions/5967500/how-to-correctly-sort-a-string-with-a-number- + inside + + Examples + -------- + >>> alist=["something1", "something12", "something17", "something2"] + >>> alist.sort(key=natural_keys) + >>> print(alist) + ['something1', 'something2', 'something12', 'something17'] + + """ + return [_atoi(c) for c in re.split('(\d+)', text)]