diff --git a/ci/requirements/environment.yaml b/ci/requirements/environment.yaml index 4f35de6..529ce1a 100644 --- a/ci/requirements/environment.yaml +++ b/ci/requirements/environment.yaml @@ -13,6 +13,7 @@ dependencies: - healpy - geopandas - pandas + - sparse - xarray - folium - shapely diff --git a/healpix_convolution/__init__.py b/healpix_convolution/__init__.py index 0ebbbd1..cbfdd11 100644 --- a/healpix_convolution/__init__.py +++ b/healpix_convolution/__init__.py @@ -1,5 +1,9 @@ from importlib.metadata import version +from healpix_convolution import kernels # noqa: F401 +from healpix_convolution.distances import angular_distances # noqa: F401 +from healpix_convolution.neighbours import neighbours # noqa: F401 + try: __version__ = version("healpix_convolution") except Exception: diff --git a/healpix_convolution/distances.py b/healpix_convolution/distances.py index 05e5b4f..13a3946 100644 --- a/healpix_convolution/distances.py +++ b/healpix_convolution/distances.py @@ -30,7 +30,7 @@ def _distances(a, b, axis, nside, nest): return np.arctan2(cross_product, dot_product) -def distances(neighbours, *, resolution, indexing_scheme="nested", axis=None): +def angular_distances(neighbours, *, resolution, indexing_scheme="nested", axis=None): """compute the angular great-circle distances between neighbours Parameters diff --git a/healpix_convolution/kernels/__init__.py b/healpix_convolution/kernels/__init__.py new file mode 100644 index 0000000..e631c98 --- /dev/null +++ b/healpix_convolution/kernels/__init__.py @@ -0,0 +1 @@ +from healpix_convolution.kernels.gaussian import gaussian_kernel # noqa: F401 diff --git a/healpix_convolution/kernels/common.py b/healpix_convolution/kernels/common.py new file mode 100644 index 0000000..fceb2aa --- /dev/null +++ b/healpix_convolution/kernels/common.py @@ -0,0 +1,19 @@ +import numpy as np +import sparse + + +def create_sparse(cell_ids, neighbours, weights, shape): + neighbours_ = np.reshape(neighbours, -1) + mask = neighbours_ != -1 + + n_neighbours = neighbours.shape[-1] + cell_ids_ = np.reshape( + np.repeat(cell_ids[..., None], repeats=n_neighbours, axis=-1), -1 + ) + + coords = np.reshape(np.stack([cell_ids_, neighbours_], axis=0), (2, -1)) + + weights_ = np.reshape(weights, -1)[mask] + coords_ = coords[..., mask] + + return sparse.COO(coords=coords_, data=weights_, shape=shape, fill_value=0) diff --git a/healpix_convolution/kernels/gaussian.py b/healpix_convolution/kernels/gaussian.py new file mode 100644 index 0000000..85d5d9f --- /dev/null +++ b/healpix_convolution/kernels/gaussian.py @@ -0,0 +1,71 @@ +import healpy as hp +import numpy as np + +from healpix_convolution.distances import angular_distances +from healpix_convolution.kernels.common import create_sparse +from healpix_convolution.neighbours import neighbours + + +def gaussian_kernel( + cell_ids, + *, + resolution: int, + indexing_scheme: str, + sigma: float, + truncate: float = 4.0, + kernel_size: int | None = None, +): + """construct a gaussian kernel on the healpix grid + + Parameters + ---------- + cell_ids : array-like + The cell ids. + resolution : int + The healpix resolution + indexing_scheme : {"nested", "ring"} + The healpix indexing scheme + sigma : float + The standard deviation of the gaussian kernel + truncate : float, default: 4.0 + Truncate the kernel after this many multiples of ``sigma``. + kernel_size : int, optional + If given, determines the size of the kernel. In that case, ``truncate`` is ignored. + + Returns + ------- + kernel : sparse.COO + The constructed kernel + + Notes + ----- + no dask support, yet + """ + if cell_ids.ndim != 1 or len([s for s in cell_ids.shape if s != 1]) != 1: + raise ValueError( + f"cell ids must be 1-dimensional, but shape is: {cell_ids.shape}" + ) + + cell_ids = np.reshape(cell_ids, -1) + + # TODO: figure out whether there is a better way of defining the units of `sigma` + if kernel_size is not None: + ring = int(kernel_size / 2) + else: + cell_distance = hp.nside2resol(2**resolution, arcmin=False) + ring = int((truncate * sigma / cell_distance) // 2) + + nb = neighbours( + cell_ids, resolution=resolution, indexing_scheme=indexing_scheme, ring=ring + ) + d = angular_distances(nb, resolution=resolution, indexing_scheme=indexing_scheme) + + sigma2 = sigma * sigma + phi_x = np.exp(-0.5 / sigma2 * d**2) + masked = np.where(nb == -1, 0, phi_x) + normalized = masked / np.sum(masked, axis=1, keepdims=True) + + # TODO (keewis): figure out a way to translate global healpix indices to local ones + # The kernel should still work for a subset of the full map. + shape = (12 * 4**resolution, 12 * 4**resolution) + return create_sparse(cell_ids, nb, normalized, shape=shape) diff --git a/healpix_convolution/tests/test_distances.py b/healpix_convolution/tests/test_distances.py index eba939d..1688226 100644 --- a/healpix_convolution/tests/test_distances.py +++ b/healpix_convolution/tests/test_distances.py @@ -12,6 +12,6 @@ ), ) def test_distances_numpy(neighbours, expected): - actual = hds.distances(neighbours, resolution=2, indexing_scheme="nested") + actual = hds.angular_distances(neighbours, resolution=2, indexing_scheme="nested") np.testing.assert_allclose(actual, expected) diff --git a/healpix_convolution/tests/test_kernels.py b/healpix_convolution/tests/test_kernels.py new file mode 100644 index 0000000..bea4d91 --- /dev/null +++ b/healpix_convolution/tests/test_kernels.py @@ -0,0 +1,114 @@ +import numpy as np +import pytest + +from healpix_convolution import kernels + + +@pytest.mark.parametrize( + ["cell_ids", "neighbours", "weights"], + ( + pytest.param( + np.array([0, 1]), + np.array( + [[0, 17, 19, 2, 3, 1, 23, 22, 35], [3, 2, 13, 15, 11, 7, 6, 1, 0]] + ), + np.full((18,), fill_value=1 / 18), + ), + pytest.param( + np.array([3, 2]), + np.array( + [[3, 2, 13, 15, 11, 7, 6, 1, 0], [2, 19, -1, 13, 15, 3, 1, 0, 17]] + ), + np.full((18,), fill_value=1 / 17), + ), + ), +) +def test_create_sparse(cell_ids, neighbours, weights): + actual = kernels.common.create_sparse(cell_ids, neighbours, weights, shape=(48, 48)) + + nnz = np.sum(neighbours != -1, axis=1) + value = nnz * weights[0] + + assert hasattr(actual, "nnz"), "not a sparse matrix" + assert np.allclose( + np.sum(actual[cell_ids, :], axis=1).todense(), value + ), "rows have unexpected values" + assert actual.size == 48**2 + + +class TestGaussian: + @pytest.mark.parametrize( + ["cell_ids", "kwargs"], + ( + ( + np.array([1, 2]), + {"resolution": 1, "indexing_scheme": "nested", "sigma": 0.1}, + ), + ( + np.array([1, 2]), + {"resolution": 1, "indexing_scheme": "ring", "sigma": 0.1}, + ), + ( + np.array([0, 2]), + {"resolution": 1, "indexing_scheme": "nested", "sigma": 0.2}, + ), + ( + np.array([1, 2]), + { + "resolution": 1, + "indexing_scheme": "nested", + "sigma": 0.1, + "kernel_size": 5, + }, + ), + ( + np.array([3, 0]), + { + "resolution": 1, + "indexing_scheme": "ring", + "sigma": 0.1, + "kernel_size": 3, + }, + ), + ), + ) + def test_gaussian_kernel(self, cell_ids, kwargs): + actual = kernels.gaussian_kernel(cell_ids, **kwargs) + + kernel_sum = np.sum(actual, axis=1) + + assert np.sum(np.isnan(actual)) == 0 + np.testing.assert_allclose(kernel_sum[cell_ids].todense(), 1) + + # try determining the sigma from the values for better tests + + @pytest.mark.parametrize( + ["cell_ids", "kwargs", "error", "pattern"], + ( + ( + np.array([[0, 1], [2, 3]]), + { + "resolution": 1, + "indexing_scheme": "nested", + "sigma": 0.1, + "kernel_size": 3, + }, + ValueError, + "1-dimensional", + ), + ( + np.array([0, 1]), + { + "resolution": 1, + "indexing_scheme": "nested", + "sigma": 0.1, + "kernel_size": 7, + }, + ValueError, + "more than the neighbouring base pixels", + ), + ), + ) + def test_gaussian_kernel_errors(self, cell_ids, kwargs, error, pattern): + with pytest.raises(error, match=pattern): + kernels.gaussian_kernel(cell_ids, **kwargs) diff --git a/healpix_convolution/tests/test_neighbours.py b/healpix_convolution/tests/test_neighbours.py index 0c87149..12333aa 100644 --- a/healpix_convolution/tests/test_neighbours.py +++ b/healpix_convolution/tests/test_neighbours.py @@ -4,7 +4,7 @@ import pytest from hypothesis import given -import healpix_convolution.neighbours as nb +from healpix_convolution.neighbours import generate_offsets, neighbours try: import dask.array as da @@ -22,7 +22,7 @@ def test_generate_offsets(ring): kernel_size = 2 * ring + 1 kernel = np.zeros(shape=(kernel_size, kernel_size)) - for x, y in nb.generate_offsets(ring): + for x, y in generate_offsets(ring): kernel[x + ring, y + ring] = 1 assert np.sum(kernel) == kernel_size**2 @@ -39,7 +39,7 @@ def test_neighbours_ring1_manual(resolution, indexing_scheme, dask): cell_ids = xp.arange(12 * 4**resolution) - actual = nb.neighbours( + actual = neighbours( cell_ids, resolution=resolution, indexing_scheme=indexing_scheme, ring=1 ) @@ -56,7 +56,7 @@ def test_neighbours_ring1_manual(resolution, indexing_scheme, dask): def test_neighbours_ring1(resolution, indexing_scheme): cell_ids = np.arange(12 * 4**resolution) - actual = nb.neighbours( + actual = neighbours( cell_ids, resolution=resolution, indexing_scheme=indexing_scheme, ring=1 ) nside = 2**resolution