Skip to content

Commit

Permalink
gaussian kernel (#5)
Browse files Browse the repository at this point in the history
* add a function to create sparse arrays from coordinates and weights

* implement a gaussian kernel for healpix cells

* explicitly set the fill value

* expose the main functionality at the top level

* don't export `kernels.common`

* export `gaussian_kernel`

* directly import the used functions

* add a docstring

* add comments about what to improve at a later point

* transform the sum to dense when normalizing

Otherwise, the entire sparse matrix will become dense, which is most
likely too much to fit into memory.

* add `sparse` to the environment

* rename `distances` to `angular_distances`

* import the methods in `neighbours` directly: the module is shadowed

* raise if the cell ids are not 1D

* allow passing the kernel size

* fix the check for squeezable dims

* add tests for the gaussian kernel function

* avoid the normalization of the sparse array

The issue with the sparse normalization is that the fill value would
change to `nan`.
  • Loading branch information
keewis authored May 22, 2024
1 parent 311fd00 commit 79639a7
Show file tree
Hide file tree
Showing 9 changed files with 216 additions and 6 deletions.
1 change: 1 addition & 0 deletions ci/requirements/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies:
- healpy
- geopandas
- pandas
- sparse
- xarray
- folium
- shapely
4 changes: 4 additions & 0 deletions healpix_convolution/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from importlib.metadata import version

from healpix_convolution import kernels # noqa: F401
from healpix_convolution.distances import angular_distances # noqa: F401
from healpix_convolution.neighbours import neighbours # noqa: F401

try:
__version__ = version("healpix_convolution")
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion healpix_convolution/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _distances(a, b, axis, nside, nest):
return np.arctan2(cross_product, dot_product)


def distances(neighbours, *, resolution, indexing_scheme="nested", axis=None):
def angular_distances(neighbours, *, resolution, indexing_scheme="nested", axis=None):
"""compute the angular great-circle distances between neighbours
Parameters
Expand Down
1 change: 1 addition & 0 deletions healpix_convolution/kernels/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from healpix_convolution.kernels.gaussian import gaussian_kernel # noqa: F401
19 changes: 19 additions & 0 deletions healpix_convolution/kernels/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import numpy as np
import sparse


def create_sparse(cell_ids, neighbours, weights, shape):
neighbours_ = np.reshape(neighbours, -1)
mask = neighbours_ != -1

n_neighbours = neighbours.shape[-1]
cell_ids_ = np.reshape(
np.repeat(cell_ids[..., None], repeats=n_neighbours, axis=-1), -1
)

coords = np.reshape(np.stack([cell_ids_, neighbours_], axis=0), (2, -1))

weights_ = np.reshape(weights, -1)[mask]
coords_ = coords[..., mask]

return sparse.COO(coords=coords_, data=weights_, shape=shape, fill_value=0)
71 changes: 71 additions & 0 deletions healpix_convolution/kernels/gaussian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import healpy as hp
import numpy as np

from healpix_convolution.distances import angular_distances
from healpix_convolution.kernels.common import create_sparse
from healpix_convolution.neighbours import neighbours


def gaussian_kernel(
cell_ids,
*,
resolution: int,
indexing_scheme: str,
sigma: float,
truncate: float = 4.0,
kernel_size: int | None = None,
):
"""construct a gaussian kernel on the healpix grid
Parameters
----------
cell_ids : array-like
The cell ids.
resolution : int
The healpix resolution
indexing_scheme : {"nested", "ring"}
The healpix indexing scheme
sigma : float
The standard deviation of the gaussian kernel
truncate : float, default: 4.0
Truncate the kernel after this many multiples of ``sigma``.
kernel_size : int, optional
If given, determines the size of the kernel. In that case, ``truncate`` is ignored.
Returns
-------
kernel : sparse.COO
The constructed kernel
Notes
-----
no dask support, yet
"""
if cell_ids.ndim != 1 or len([s for s in cell_ids.shape if s != 1]) != 1:
raise ValueError(
f"cell ids must be 1-dimensional, but shape is: {cell_ids.shape}"
)

cell_ids = np.reshape(cell_ids, -1)

# TODO: figure out whether there is a better way of defining the units of `sigma`
if kernel_size is not None:
ring = int(kernel_size / 2)
else:
cell_distance = hp.nside2resol(2**resolution, arcmin=False)
ring = int((truncate * sigma / cell_distance) // 2)

nb = neighbours(
cell_ids, resolution=resolution, indexing_scheme=indexing_scheme, ring=ring
)
d = angular_distances(nb, resolution=resolution, indexing_scheme=indexing_scheme)

sigma2 = sigma * sigma
phi_x = np.exp(-0.5 / sigma2 * d**2)
masked = np.where(nb == -1, 0, phi_x)
normalized = masked / np.sum(masked, axis=1, keepdims=True)

# TODO (keewis): figure out a way to translate global healpix indices to local ones
# The kernel should still work for a subset of the full map.
shape = (12 * 4**resolution, 12 * 4**resolution)
return create_sparse(cell_ids, nb, normalized, shape=shape)
2 changes: 1 addition & 1 deletion healpix_convolution/tests/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
),
)
def test_distances_numpy(neighbours, expected):
actual = hds.distances(neighbours, resolution=2, indexing_scheme="nested")
actual = hds.angular_distances(neighbours, resolution=2, indexing_scheme="nested")

np.testing.assert_allclose(actual, expected)
114 changes: 114 additions & 0 deletions healpix_convolution/tests/test_kernels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import numpy as np
import pytest

from healpix_convolution import kernels


@pytest.mark.parametrize(
["cell_ids", "neighbours", "weights"],
(
pytest.param(
np.array([0, 1]),
np.array(
[[0, 17, 19, 2, 3, 1, 23, 22, 35], [3, 2, 13, 15, 11, 7, 6, 1, 0]]
),
np.full((18,), fill_value=1 / 18),
),
pytest.param(
np.array([3, 2]),
np.array(
[[3, 2, 13, 15, 11, 7, 6, 1, 0], [2, 19, -1, 13, 15, 3, 1, 0, 17]]
),
np.full((18,), fill_value=1 / 17),
),
),
)
def test_create_sparse(cell_ids, neighbours, weights):
actual = kernels.common.create_sparse(cell_ids, neighbours, weights, shape=(48, 48))

nnz = np.sum(neighbours != -1, axis=1)
value = nnz * weights[0]

assert hasattr(actual, "nnz"), "not a sparse matrix"
assert np.allclose(
np.sum(actual[cell_ids, :], axis=1).todense(), value
), "rows have unexpected values"
assert actual.size == 48**2


class TestGaussian:
@pytest.mark.parametrize(
["cell_ids", "kwargs"],
(
(
np.array([1, 2]),
{"resolution": 1, "indexing_scheme": "nested", "sigma": 0.1},
),
(
np.array([1, 2]),
{"resolution": 1, "indexing_scheme": "ring", "sigma": 0.1},
),
(
np.array([0, 2]),
{"resolution": 1, "indexing_scheme": "nested", "sigma": 0.2},
),
(
np.array([1, 2]),
{
"resolution": 1,
"indexing_scheme": "nested",
"sigma": 0.1,
"kernel_size": 5,
},
),
(
np.array([3, 0]),
{
"resolution": 1,
"indexing_scheme": "ring",
"sigma": 0.1,
"kernel_size": 3,
},
),
),
)
def test_gaussian_kernel(self, cell_ids, kwargs):
actual = kernels.gaussian_kernel(cell_ids, **kwargs)

kernel_sum = np.sum(actual, axis=1)

assert np.sum(np.isnan(actual)) == 0
np.testing.assert_allclose(kernel_sum[cell_ids].todense(), 1)

# try determining the sigma from the values for better tests

@pytest.mark.parametrize(
["cell_ids", "kwargs", "error", "pattern"],
(
(
np.array([[0, 1], [2, 3]]),
{
"resolution": 1,
"indexing_scheme": "nested",
"sigma": 0.1,
"kernel_size": 3,
},
ValueError,
"1-dimensional",
),
(
np.array([0, 1]),
{
"resolution": 1,
"indexing_scheme": "nested",
"sigma": 0.1,
"kernel_size": 7,
},
ValueError,
"more than the neighbouring base pixels",
),
),
)
def test_gaussian_kernel_errors(self, cell_ids, kwargs, error, pattern):
with pytest.raises(error, match=pattern):
kernels.gaussian_kernel(cell_ids, **kwargs)
8 changes: 4 additions & 4 deletions healpix_convolution/tests/test_neighbours.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from hypothesis import given

import healpix_convolution.neighbours as nb
from healpix_convolution.neighbours import generate_offsets, neighbours

try:
import dask.array as da
Expand All @@ -22,7 +22,7 @@ def test_generate_offsets(ring):
kernel_size = 2 * ring + 1
kernel = np.zeros(shape=(kernel_size, kernel_size))

for x, y in nb.generate_offsets(ring):
for x, y in generate_offsets(ring):
kernel[x + ring, y + ring] = 1

assert np.sum(kernel) == kernel_size**2
Expand All @@ -39,7 +39,7 @@ def test_neighbours_ring1_manual(resolution, indexing_scheme, dask):

cell_ids = xp.arange(12 * 4**resolution)

actual = nb.neighbours(
actual = neighbours(
cell_ids, resolution=resolution, indexing_scheme=indexing_scheme, ring=1
)

Expand All @@ -56,7 +56,7 @@ def test_neighbours_ring1_manual(resolution, indexing_scheme, dask):
def test_neighbours_ring1(resolution, indexing_scheme):
cell_ids = np.arange(12 * 4**resolution)

actual = nb.neighbours(
actual = neighbours(
cell_ids, resolution=resolution, indexing_scheme=indexing_scheme, ring=1
)
nside = 2**resolution
Expand Down

0 comments on commit 79639a7

Please sign in to comment.