From 864669208c2ee030038566932376c98c348ecadb Mon Sep 17 00:00:00 2001 From: Marco Cuturi Date: Thu, 20 Oct 2022 00:14:15 +0200 Subject: [PATCH] Euclidean / SqEuclidean and changes in power (#157) * Euclidean / SqEuclidean and changes in power * fix, following expose cost_fn in PointCloud * fix power=2 that were still there * fix * fix in plot, *needed* to update lines in anims. --- README.md | 2 +- docs/geometry.rst | 1 + docs/notebooks/Sinkhorn_Barycenters.ipynb | 2 +- docs/notebooks/introduction_grid.ipynb | 6 +- docs/notebooks/point_clouds.ipynb | 8 ++- ott/core/bar_problems.py | 8 +-- ott/core/potentials.py | 2 +- ott/geometry/costs.py | 11 +++- ott/geometry/grid.py | 2 +- ott/geometry/pointcloud.py | 66 +++++++++---------- ott/tools/k_means.py | 4 +- ott/tools/plot.py | 8 +-- ott/tools/segment_sinkhorn.py | 2 +- ott/tools/sinkhorn_divergence.py | 2 +- tests/core/sinkhorn_diff_test.py | 53 ++++++++------- tests/core/sinkhorn_test.py | 4 +- .../geometry_pointcloud_apply_test.py | 6 +- tests/tools/k_means_test.py | 2 +- 18 files changed, 105 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 99b92c720..a3aa1ea17 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Currently implements the following classes and functions: - The [geometry](ott/geometry) folder describes tools that to encapsulate the essential ingredients of OT problems: measures and cost functions. - - The `CostFn` class in [costs.py](ott/geometry/costs.py) and its descendants define cost functions between points. A few simple costs are considered, `Euclidean` between vectors, and `Bures`, between a pair of mean vector and covariance (p.d.) matrix. + - The `CostFn` class in [costs.py](ott/geometry/costs.py) and its descendants define cost functions between points. A few simple costs are considered, `SqEuclidean` between vectors, and `Bures`, between a pair of mean vector and covariance (p.d.) matrix. - The `Geometry` class in [geometry.py](ott/geometry/geometry.py) describes a cost structure between two measures. That cost structure is accessed through various member functions, either used when running the Sinkhorn algorithm (typically kernel multiplications, or log-sum-exp row/column-wise application) or after (to apply the OT matrix to a vector). diff --git a/docs/geometry.rst b/docs/geometry.rst index e13f9c885..c2c438f0c 100644 --- a/docs/geometry.rst +++ b/docs/geometry.rst @@ -50,6 +50,7 @@ Cost Functions :toctree: _autosummary costs.CostFn + costs.SqEuclidean costs.Euclidean costs.Cosine costs.Bures diff --git a/docs/notebooks/Sinkhorn_Barycenters.ipynb b/docs/notebooks/Sinkhorn_Barycenters.ipynb index febfce418..db3116b76 100644 --- a/docs/notebooks/Sinkhorn_Barycenters.ipynb +++ b/docs/notebooks/Sinkhorn_Barycenters.ipynb @@ -425,7 +425,7 @@ "id": "tqg86SFQvzXC" }, "source": [ - "### Euclidean barycenter, for reference" + "### SqEuclidean barycenter, for reference" ] }, { diff --git a/docs/notebooks/introduction_grid.ipynb b/docs/notebooks/introduction_grid.ipynb index 8769d1f08..402041b86 100644 --- a/docs/notebooks/introduction_grid.ipynb +++ b/docs/notebooks/introduction_grid.ipynb @@ -309,8 +309,8 @@ "outputs": [], "source": [ "@jax.tree_util.register_pytree_node_class\n", - "class EuclideanTimes2(costs.CostFn):\n", - " \"\"\"The cost function corresponding to the squared euclidean distance times 2.\"\"\"\n", + "class SqEuclideanTimes2(costs.CostFn):\n", + " \"\"\"The cost function corresponding to the squared SqEuclidean distance times 2.\"\"\"\n", "\n", " def norm(self, x):\n", " return jnp.sum(x**2, axis=-1) * 2\n", @@ -319,7 +319,7 @@ " return -2 * jnp.sum(x * y) * 2\n", "\n", "\n", - "cost_fns = [EuclideanTimes2(), costs.Euclidean()]" + "cost_fns = [SqEuclideanTimes2(), costs.SqEuclidean()]" ] }, { diff --git a/docs/notebooks/point_clouds.ipynb b/docs/notebooks/point_clouds.ipynb index 596d41f29..530917f69 100644 --- a/docs/notebooks/point_clouds.ipynb +++ b/docs/notebooks/point_clouds.ipynb @@ -85,7 +85,7 @@ "source": [ "## Computes the regularized optimal transport\n", "\n", - "To compute the transport matrix between the two point clouds, one can define a `PointCloud` geometry (which by default uses `ott.geometry.costs.Euclidean` for cost function), then call the `sinkhorn` function, and build the transport matrix from the optimized potentials." + "To compute the transport matrix between the two point clouds, one can define a `PointCloud` geometry (which by default uses `ott.geometry.costs.SqEuclidean` for cost function), then call the `sinkhorn` function, and build the transport matrix from the optimized potentials." ] }, { @@ -235,7 +235,7 @@ " y: jnp.ndarray,\n", " a: jnp.ndarray,\n", " b: jnp.ndarray,\n", - " cost_fn=ott.geometry.costs.Euclidean(),\n", + " cost_fn=ott.geometry.costs.SqEuclidean(),\n", " num_iter: int = 101,\n", " dump_every: int = 10,\n", " learning_rate: float = 0.2,\n", @@ -5972,7 +5972,9 @@ "source": [ "from IPython import display\n", "\n", - "ots = optimize(x, y, a, b, num_iter=100, cost_fn=ott.geometry.costs.Euclidean())\n", + "ots = optimize(\n", + " x, y, a, b, num_iter=100, cost_fn=ott.geometry.costs.SqEuclidean()\n", + ")\n", "fig = plt.figure(figsize=(8, 5))\n", "plott = ott.tools.plot.Plot(fig=fig)\n", "anim = plott.animate(ots, frame_rate=4)\n", diff --git a/ott/core/bar_problems.py b/ott/core/bar_problems.py index ee214ca6d..68c29f325 100644 --- a/ott/core/bar_problems.py +++ b/ott/core/bar_problems.py @@ -42,7 +42,7 @@ class BarycenterProblem: weights: Array of shape ``[num_measures,]`` containing the weights of the measures. cost_fn: Cost function used. If `None`, - use :class:`~ott.geometry.costs.Euclidean` cost. + use :class:`~ott.geometry.costs.SqEuclidean` cost. epsilon: Epsilon regularization used to solve reg-OT problems. debiased: **Currently not implemented.** Whether the problem is debiased, in the sense that @@ -75,7 +75,7 @@ def __init__( raise ValueError("Specify weights if `y` is already segmented.") self._b = b self._weights = weights - self.cost_fn = costs.Euclidean() if cost_fn is None else cost_fn + self.cost_fn = costs.SqEuclidean() if cost_fn is None else cost_fn self.epsilon = epsilon self.debiased = debiased self._kwargs = kwargs @@ -309,7 +309,7 @@ def update_features(self, transports: jnp.ndarray, """Update the barycenter features in the fused case :cite:`vayer:19`. Uses :cite:`cuturi:14` eq. 8, and is implemented only - for the squared :class:`~ott.geometry.costs.Euclidean` cost. + for the squared :class:`~ott.geometry.costs.SqEuclidean` cost. Args: transports: Transport maps of shape @@ -330,7 +330,7 @@ def update_features(self, transports: jnp.ndarray, transports = transports * inv_a[None, :, None] if self._loss_name == "sqeucl": - cost = costs.Euclidean() + cost = costs.SqEuclidean() return jnp.sum( weights * barycentric_projection(transports, y_fused, cost), axis=0 ) diff --git a/ott/core/potentials.py b/ott/core/potentials.py index 5a1beb42e..0170825cc 100644 --- a/ott/core/potentials.py +++ b/ott/core/potentials.py @@ -162,7 +162,7 @@ def callback(x: jnp.ndarray) -> float: cost = pointcloud.PointCloud( jnp.atleast_2d(x), y, - cost_fn=self._geom._cost_fn, + cost_fn=self._geom.cost_fn, power=self._geom.power, epsilon=1.0 # epsilon is not used ).cost_matrix diff --git a/ott/geometry/costs.py b/ott/geometry/costs.py index f98c573ec..fae64a596 100644 --- a/ott/geometry/costs.py +++ b/ott/geometry/costs.py @@ -92,7 +92,16 @@ def tree_unflatten(cls, aux_data, children): @jax.tree_util.register_pytree_node_class class Euclidean(CostFn): - """Squared Euclidean distance CostFn.""" + """Euclidean distance.""" + + def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float: + """Compute Euclidean norm.""" + return jnp.linalg.norm(x - y) + + +@jax.tree_util.register_pytree_node_class +class SqEuclidean(CostFn): + """Squared Euclidean distance.""" def norm(self, x: jnp.ndarray) -> Union[float, jnp.ndarray]: """Compute squared Euclidean norm for vector.""" diff --git a/ott/geometry/grid.py b/ott/geometry/grid.py index fdd9f346a..25e9809eb 100644 --- a/ott/geometry/grid.py +++ b/ott/geometry/grid.py @@ -101,7 +101,7 @@ def __init__( raise ValueError('Input either grid_size t-uple or grid locations x.') if cost_fns is None: - cost_fns = [costs.Euclidean()] + cost_fns = [costs.SqEuclidean()] self.cost_fns = cost_fns self.kwargs = { 'num_a': self.num_a, diff --git a/ott/geometry/pointcloud.py b/ott/geometry/pointcloud.py index 64c10482b..9b518090e 100644 --- a/ott/geometry/pointcloud.py +++ b/ott/geometry/pointcloud.py @@ -65,7 +65,7 @@ def __init__( x: jnp.ndarray, y: Optional[jnp.ndarray] = None, cost_fn: Optional[costs.CostFn] = None, - power: float = 2.0, + power: float = 1.0, batch_size: Optional[int] = None, scale_cost: Union[bool, int, float, Literal['mean', 'max_norm', 'max_bound', 'max_cost', @@ -75,9 +75,9 @@ def __init__( super().__init__(**kwargs) self.x = x self.y = self.x if y is None else y - self._cost_fn = costs.Euclidean() if cost_fn is None else cost_fn + self.cost_fn = costs.SqEuclidean() if cost_fn is None else cost_fn self.power = power - self._axis_norm = 0 if callable(self._cost_fn.norm) else None + self._axis_norm = 0 if callable(self.cost_fn.norm) else None if batch_size is not None: assert batch_size > 0, f"`batch_size={batch_size}` must be positive." self._batch_size = batch_size @@ -86,13 +86,13 @@ def __init__( @property def _norm_x(self) -> Union[float, jnp.ndarray]: if self._axis_norm == 0: - return self._cost_fn.norm(self.x) + return self.cost_fn.norm(self.x) return 0. @property def _norm_y(self) -> Union[float, jnp.ndarray]: if self._axis_norm == 0: - return self._cost_fn.norm(self.y) + return self.cost_fn.norm(self.y) return 0. @property @@ -125,7 +125,7 @@ def is_symmetric(self) -> bool: @property def is_squared_euclidean(self) -> bool: - return isinstance(self._cost_fn, costs.Euclidean) and self.power == 2.0 + return isinstance(self.cost_fn, costs.SqEuclidean) and self.power == 1.0 @property def is_online(self) -> bool: @@ -163,7 +163,7 @@ def inv_scale_cost(self) -> float: "the cost matrix with the online mode is not implemented." ) if self._scale_cost == 'max_norm': - if self._cost_fn.norm is not None: + if self.cost_fn.norm is not None: return 1.0 / jnp.maximum(self._norm_x.max(), self._norm_y.max()) return 1.0 if self._scale_cost == 'max_bound': @@ -183,11 +183,11 @@ def inv_scale_cost(self) -> float: raise ValueError(f'Scaling {self._scale_cost} not implemented.') def _compute_cost_matrix(self) -> jnp.ndarray: - cost_matrix = self._cost_fn.all_pairs_pairwise(self.x, self.y) + cost_matrix = self.cost_fn.all_pairs_pairwise(self.x, self.y) if self._axis_norm is not None: cost_matrix += self._norm_x[:, jnp.newaxis] + self._norm_y[jnp.newaxis, :] - if self.power != 2.0: - cost_matrix = jnp.abs(cost_matrix) ** (0.5 * self.power) + if self.power != 1.0: + cost_matrix = jnp.abs(cost_matrix) ** self.power return cost_matrix def apply_lse_kernel( @@ -212,7 +212,7 @@ def body0(carry, i: int): self._norm_y, (i * self.batch_size,), (self.batch_size,) ) h_res, h_sgn = app( - self.x, y, self._norm_x, norm_y, f, g_, eps, vec, self._cost_fn, + self.x, y, self._norm_x, norm_y, f, g_, eps, vec, self.cost_fn, self.power, self.inv_scale_cost ) return carry, (h_res, h_sgn) @@ -230,7 +230,7 @@ def body1(carry, i: int): self._norm_x, (i * self.batch_size,), (self.batch_size,) ) h_res, h_sgn = app( - self.y, x, self._norm_y, norm_x, g, f_, eps, vec, self._cost_fn, + self.y, x, self._norm_y, norm_x, g, f_, eps, vec, self.cost_fn, self.power, self.inv_scale_cost ) return carry, (h_res, h_sgn) @@ -240,12 +240,12 @@ def finalize(i: int): norm_y = self._norm_y if self._axis_norm is None else self._norm_y[i:] return app( self.x, self.y[i:], self._norm_x, norm_y, f, g[i:], eps, vec, - self._cost_fn, self.power, self.inv_scale_cost + self.cost_fn, self.power, self.inv_scale_cost ) norm_x = self._norm_x if self._axis_norm is None else self._norm_x[i:] return app( self.y, self.x[i:], self._norm_y, norm_x, g, f[i:], eps, vec, - self._cost_fn, self.power, self.inv_scale_cost + self.cost_fn, self.power, self.inv_scale_cost ) if not self.is_online: @@ -297,12 +297,12 @@ def apply_kernel( if axis == 0: return app( self.x, self.y, self._norm_x, self._norm_y, scaling, eps, - self._cost_fn, self.power, self.inv_scale_cost + self.cost_fn, self.power, self.inv_scale_cost ) if axis == 1: return app( self.y, self.x, self._norm_y, self._norm_x, scaling, eps, - self._cost_fn, self.power, self.inv_scale_cost + self.cost_fn, self.power, self.inv_scale_cost ) def transport_from_potentials( @@ -318,7 +318,7 @@ def transport_from_potentials( ) return transport( self.y, self.x, self._norm_y, self._norm_x, g, f, self.epsilon, - self._cost_fn, self.power, self.inv_scale_cost + self.cost_fn, self.power, self.inv_scale_cost ) def transport_from_scalings( @@ -334,7 +334,7 @@ def transport_from_scalings( ) return transport( self.y, self.x, self._norm_y, self._norm_x, v, u, self.epsilon, - self._cost_fn, self.power, self.inv_scale_cost + self.cost_fn, self.power, self.inv_scale_cost ) def apply_cost( @@ -387,12 +387,12 @@ def _apply_cost( arr = arr.reshape(-1, 1) if axis == 0: return app( - self.x, self.y, self._norm_x, self._norm_y, arr, self._cost_fn, + self.x, self.y, self._norm_x, self._norm_y, arr, self.cost_fn, self.power, self.inv_scale_cost, fn ) if axis == 1: return app( - self.y, self.x, self._norm_y, self._norm_x, arr, self._cost_fn, + self.y, self.x, self._norm_y, self._norm_x, arr, self.cost_fn, self.power, self.inv_scale_cost, fn ) else: @@ -464,7 +464,7 @@ def body0(carry, i: int): else: norm_y = self._leading_slice(self._norm_y, i) h_res = app( - self.x, y, self._norm_x, norm_y, vec, self._cost_fn, self.power, + self.x, y, self._norm_x, norm_y, vec, self.cost_fn, self.power, scale_cost ) return carry, h_res @@ -477,7 +477,7 @@ def body1(carry, i: int): else: norm_x = self._leading_slice(self._norm_x, i) h_res = app( - self.y, x, self._norm_y, norm_x, vec, self._cost_fn, self.power, + self.y, x, self._norm_y, norm_x, vec, self.cost_fn, self.power, scale_cost ) return carry, h_res @@ -486,12 +486,12 @@ def finalize(i: int): if batch_for_y: norm_y = self._norm_y if self._axis_norm is None else self._norm_y[i:] return app( - self.x, self.y[i:], self._norm_x, norm_y, vec, self._cost_fn, + self.x, self.y[i:], self._norm_x, norm_y, vec, self.cost_fn, self.power, scale_cost ) norm_x = self._norm_x if self._axis_norm is None else self._norm_x[i:] return app( - self.y, self.x[i:], self._norm_y, norm_x, vec, self._cost_fn, + self.y, self.x[i:], self._norm_y, norm_x, vec, self.cost_fn, self.power, scale_cost ) @@ -532,9 +532,9 @@ def finalize(i: int): ) def barycenter(self, weights: jnp.ndarray) -> jnp.ndarray: - """Compute barycenter of points in self.x using weights, valid for p=2.0.""" - assert self.power == 2.0, self.power - return self._cost_fn.barycenter(self.x, weights) + """Compute barycenter of points in self.x using weights, valid for p=1.0.""" + assert self.power == 1.0, self.power + return self.cost_fn.barycenter(self.x, weights) @classmethod def prepare_divergences( @@ -560,7 +560,7 @@ def prepare_divergences( def tree_flatten(self): # passing self.power in aux_data to be able to condition on it. - return ([self.x, self.y, self._src_mask, self._tgt_mask, self._cost_fn], { + return ([self.x, self.y, self._src_mask, self._tgt_mask, self.cost_fn], { 'epsilon': self._epsilon_init, 'relative_epsilon': self._relative_epsilon, 'scale_epsilon': self._scale_epsilon, @@ -577,14 +577,14 @@ def tree_unflatten(cls, aux_data, children): ) def _cosine_to_sqeucl(self) -> 'PointCloud': - assert isinstance(self._cost_fn, costs.Cosine), type(self._cost_fn) - assert self.power == 2, self.power + assert isinstance(self.cost_fn, costs.Cosine), type(self.cost_fn) + assert self.power == 1.0, self.power (x, y, *args, _), aux_data = self.tree_flatten() x = x / jnp.linalg.norm(x, axis=-1, keepdims=True) y = y / jnp.linalg.norm(y, axis=-1, keepdims=True) # TODO(michalk8): find a better way aux_data["scale_cost"] = 2. / self.inv_scale_cost - cost_fn = costs.Euclidean() + cost_fn = costs.SqEuclidean() return type(self).tree_unflatten(aux_data, [x, y] + args + [cost_fn]) def to_LRCGeometry( @@ -767,8 +767,8 @@ def _transport_from_scalings_xy( def _cost(x, y, norm_x, norm_y, cost_fn, cost_pow, scale_cost): one_line_pairwise = jax.vmap(cost_fn.pairwise, in_axes=[0, None]) cost = norm_x + norm_y + one_line_pairwise(x, y) - if cost_pow != 2.0: - cost = jnp.abs(cost) ** (0.5 * cost_pow) + if cost_pow != 1.0: + cost = jnp.abs(cost) ** cost_pow return cost * scale_cost diff --git a/ott/tools/k_means.py b/ott/tools/k_means.py index 844e1f4cf..335d16dc8 100644 --- a/ott/tools/k_means.py +++ b/ott/tools/k_means.py @@ -358,7 +358,7 @@ def k_means( Args: geom: Point cloud of shape ``[n, ndim]`` to cluster. If passed as an array, - :class:`~ott.geometry.costs.Euclidean` cost is assumed. + :class:`~ott.geometry.costs.SqEuclidean` cost is assumed. k: The number of clusters. weights: The weights of input points. These weights are considered when computing the centroids and inertia. If ``None``, use uniform weights. @@ -388,7 +388,7 @@ def k_means( 0] >= k, f"Cannot cluster `{geom.shape[0]}` points into `{k}` clusters." if isinstance(geom, jnp.ndarray): geom = pointcloud.PointCloud(geom) - if isinstance(geom._cost_fn, costs.Cosine): + if isinstance(geom.cost_fn, costs.Cosine): geom = geom._cosine_to_sqeucl() assert geom.is_squared_euclidean diff --git a/ott/tools/plot.py b/ott/tools/plot.py index e8183e8f1..fec2e4037 100644 --- a/ott/tools/plot.py +++ b/ott/tools/plot.py @@ -43,15 +43,15 @@ class Plot: possibilities to create animations as matplotlib.animation.FuncAnimation, which can in turned be saved to disk at will. There are two design principles here: 1) we do not rely on saving to/loading from disk to create animations - 2) we try as much as possible to disentangle the transport problem(s) from the - its visualization(s), leveraging the transport.Transport interface. + 2) we try as much as possible to disentangle the transport problem(s) from + its visualization(s). """ def __init__( self, fig: Optional[plt.Figure] = None, ax: Optional[plt.Axes] = None, - cost_threshold: float = 0.0, + cost_threshold: float = -1.0, # should be negative for animations. scale: int = 200, show_lines: bool = True, cmap: str = 'cool' @@ -167,7 +167,7 @@ def animate( return animation.FuncAnimation( self.fig, lambda i: self.update(transports[i]), - np.arange(1, len(transports)), + np.arange(0, len(transports)), init_func=lambda: self.update(transports[0]), interval=1000 / frame_rate, blit=True diff --git a/ott/tools/segment_sinkhorn.py b/ott/tools/segment_sinkhorn.py index 690bf74e8..b770e06be 100644 --- a/ott/tools/segment_sinkhorn.py +++ b/ott/tools/segment_sinkhorn.py @@ -65,7 +65,7 @@ def segment_sinkhorn( max_measure_size: Total size of measures after padding. Should ideally be set to an upper bound on points clouds processed with the segment interface. Providing this number is required for JIT compilation to work. - cost_fn: Cost function, defaults to :class:`~ott.core.costs.Euclidean`. + cost_fn: Cost function, defaults to :class:`~ott.core.costs.SqEuclidean`. segment_ids_x: **1st interface** The segment ID for which each row of x belongs. This is a similar interface to `jax.ops.segment_sum`. segment_ids_y: **1st interface** The segment ID for which each row of y diff --git a/ott/tools/sinkhorn_divergence.py b/ott/tools/sinkhorn_divergence.py index b3564622d..faa3e1ba0 100644 --- a/ott/tools/sinkhorn_divergence.py +++ b/ott/tools/sinkhorn_divergence.py @@ -228,7 +228,7 @@ def segment_sinkhorn_divergence( set to an upper bound on points clouds processed with the segment interface. Should also be smaller than total length of `x` or `y`. Providing this number is required for JIT compilation to work. - cost_fn: Cost function, defaults to :class:`~ott.core.costs.Euclidean`. + cost_fn: Cost function, defaults to :class:`~ott.core.costs.SqEuclidean`. segment_ids_x: **1st interface** The segment ID for which each row of x belongs. This is a similar interface to :func:`jax.ops.segment_sum`. segment_ids_y: **1st interface** The segment ID for which each row of y diff --git a/tests/core/sinkhorn_diff_test.py b/tests/core/sinkhorn_diff_test.py index f8ab740b1..7e60a8309 100644 --- a/tests/core/sinkhorn_diff_test.py +++ b/tests/core/sinkhorn_diff_test.py @@ -24,7 +24,7 @@ from ott.core import implicit_differentiation as implicit_lib from ott.core import linear_problems, sinkhorn -from ott.geometry import geometry, grid, pointcloud +from ott.geometry import costs, geometry, grid, pointcloud from ott.tools import transport @@ -232,13 +232,13 @@ def loss_fn(cm): np.testing.assert_array_equal(jnp.isnan(custom_grad), False) @pytest.mark.fast.with_args( - "lse_mode,implicit_differentiation,min_iter,max_iter,epsilon", + "lse_mode,implicit_differentiation,min_iter,max_iter,epsilon,power", [ - (True, True, 0, 2000, 1e-3), - (True, True, 1000, 1000, 1e-3), - (True, False, 1000, 1000, 1e-2), - (True, False, 0, 2000, 1e-2), - (False, True, 0, 2000, 1e-2), + (True, True, 0, 2000, 1e-3, 1.0), + (True, True, 1000, 1000, 1e-3, 1.0), + (True, False, 1000, 1000, 1e-2, 2.0), + (True, False, 0, 2000, 1e-2, 2.0), + (False, True, 0, 2000, 1e-2, 1.0), ], ids=[ "lse-implicit", "lse-implicit-force_scan", "lse-backprop-force_scan", @@ -247,13 +247,8 @@ def loss_fn(cm): only_fast=[0, 1], ) def test_gradient_sinkhorn_euclidean( - self, - rng: jnp.ndarray, - lse_mode: bool, - implicit_differentiation: bool, - min_iter: int, - max_iter: int, - epsilon: float, + self, rng: jnp.ndarray, lse_mode: bool, implicit_differentiation: bool, + min_iter: int, max_iter: int, epsilon: float, power: float ): """Test gradient w.r.t. locations x of reg-ot-cost.""" # TODO(cuturi): ensure scaling mode works with backprop. @@ -270,9 +265,16 @@ def test_gradient_sinkhorn_euclidean( b = b.at[3].set(0) a = a / jnp.sum(a) b = b / jnp.sum(b) + # Adding some near-zero distances to test proper handling with power==1.0 + y = y.at[0].set(x[0, :] + 1e-3) def loss_fn(x, y): - geom = pointcloud.PointCloud(x, y, epsilon=epsilon) + geom = pointcloud.PointCloud( + x, + y, + epsilon=epsilon, + cost_fn=costs.SqEuclidean() if power == 2.0 else costs.Euclidean() + ) out = sinkhorn.sinkhorn( geom, a, @@ -283,7 +285,7 @@ def loss_fn(x, y): max_iterations=max_iter, jit=False ) - return out.reg_ot_cost, (geom, out.f, out.g) + return out.reg_ot_cost, out delta = jax.random.normal(keys[0], (n, d)) delta = delta / jnp.sqrt(jnp.vdot(delta, delta)) @@ -291,17 +293,24 @@ def loss_fn(x, y): # first calculation of gradient loss_and_grad = jax.value_and_grad(loss_fn, has_aux=True) - (loss_value, aux), grad_loss = loss_and_grad(x, y) + (loss_value, out), grad_loss = loss_and_grad(x, y) custom_grad = jnp.sum(delta * grad_loss) assert not jnp.isnan(loss_value) np.testing.assert_array_equal(grad_loss.shape, x.shape) np.testing.assert_array_equal(jnp.isnan(grad_loss), False) - # second calculation of gradient - tm = aux[0].transport_from_potentials(aux[1], aux[2]) - tmp = 2 * tm[:, :, None] * (x[:, None, :] - y[None, :, :]) - grad_x = jnp.sum(tmp, 1) - other_grad = jnp.sum(delta * grad_x) + # second calculation of gradient, only valid for power=2.0 + tm = out.matrix + if power == 2.0: + tmp = 2 * tm[:, :, None] * (x[:, None, :] - y[None, :, :]) + grad_x = jnp.sum(tmp, 1) + other_grad = jnp.sum(delta * grad_x) + if power == 1.0: + tmp = tm[:, :, None] * (x[:, None, :] - y[None, :, :]) + norms = jnp.linalg.norm(x[:, None] - y[None, :], axis=-1) + tmp /= norms[:, :, None] + 1e-8 # to stabilize when computed by hand + grad_x = jnp.sum(tmp, 1) + other_grad = jnp.sum(delta * grad_x) # third calculation of gradient loss_delta_plus, _ = loss_fn(x + eps * delta, y) diff --git a/tests/core/sinkhorn_test.py b/tests/core/sinkhorn_test.py index 580b334d9..ae3d04255 100644 --- a/tests/core/sinkhorn_test.py +++ b/tests/core/sinkhorn_test.py @@ -209,12 +209,12 @@ def test_online_vs_batch_euclidean_point_cloud(self, lse_mode: bool): self.x, self.y, epsilon=eps, batch_size=7 ) online_geom_euc = pointcloud.PointCloud( - self.x, self.y, cost_fn=costs.Euclidean(), epsilon=eps, batch_size=10 + self.x, self.y, cost_fn=costs.SqEuclidean(), epsilon=eps, batch_size=10 ) batch_geom = pointcloud.PointCloud(self.x, self.y, epsilon=eps) batch_geom_euc = pointcloud.PointCloud( - self.x, self.y, cost_fn=costs.Euclidean(), epsilon=eps + self.x, self.y, cost_fn=costs.SqEuclidean(), epsilon=eps ) out_online = sinkhorn.sinkhorn( diff --git a/tests/geometry/geometry_pointcloud_apply_test.py b/tests/geometry/geometry_pointcloud_apply_test.py index bc31770a6..0be595e63 100644 --- a/tests/geometry/geometry_pointcloud_apply_test.py +++ b/tests/geometry/geometry_pointcloud_apply_test.py @@ -37,10 +37,10 @@ def test_apply_cost_and_kernel(self, rng: jnp.ndarray): vec0 = jax.random.normal(keys[2], (n, b)) vec1 = jax.random.normal(keys[3], (m, b)) - geom = pointcloud.PointCloud(x, y, power=2, batch_size=3) + geom = pointcloud.PointCloud(x, y, batch_size=3) prod0_online = geom.apply_cost(vec0, axis=0) prod1_online = geom.apply_cost(vec1, axis=1) - geom = pointcloud.PointCloud(x, y, power=2, batch_size=None) + geom = pointcloud.PointCloud(x, y, batch_size=None) prod0 = geom.apply_cost(vec0, axis=0) prod1 = geom.apply_cost(vec1, axis=1) geom = geometry.Geometry(cost) @@ -105,7 +105,7 @@ def test_apply_cost_without_norm(self, rng: jnp.ndarray, axis: 1): pc = pointcloud.PointCloud(x, y, cost_fn=costs.Cosine()) arr = jnp.ones((pc.shape[0],)) if axis == 0 else jnp.ones((pc.shape[1],)) - assert pc._cost_fn.norm is None + assert pc.cost_fn.norm is None with pytest.raises( AssertionError, match=r"Cost matrix is not a squared Euclidean\." ): diff --git a/tests/tools/k_means_test.py b/tests/tools/k_means_test.py index defcfd2c6..416a4a849 100644 --- a/tests/tools/k_means_test.py +++ b/tests/tools/k_means_test.py @@ -24,7 +24,7 @@ def make_blobs( if cost_fn is None: pass elif cost_fn == 'sqeucl': - X = pointcloud.PointCloud(X, cost_fn=costs.Euclidean()) + X = pointcloud.PointCloud(X, cost_fn=costs.SqEuclidean()) elif cost_fn == 'cosine': X = pointcloud.PointCloud(X, cost_fn=costs.Cosine()) else: