Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DensMAP support #2946

Open
wants to merge 43 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
4bd98b6
Add docstrings
keller-mark Mar 22, 2024
b55fa95
More tests
keller-mark Mar 22, 2024
e1a0797
Fixes for tests
keller-mark Mar 22, 2024
f140bda
Typo
keller-mark Mar 22, 2024
745e79e
Linting
keller-mark Mar 22, 2024
c63e745
Ruff
keller-mark Mar 22, 2024
66cf7fd
More linting
keller-mark Mar 22, 2024
d705887
More linting
keller-mark Mar 22, 2024
af32d85
Merge branch 'main' into keller-mark/densmap-2
keller-mark May 16, 2024
ec2b6d9
Update references.bib
keller-mark May 16, 2024
b41ddbd
Update references.bib
keller-mark May 16, 2024
86a0df0
Merge
keller-mark Aug 8, 2024
335d5a9
Merge branch 'keller-mark/densmap-2' of github.com:keller-mark/scanpy…
keller-mark Aug 8, 2024
6ee89e7
Fix bug
keller-mark Aug 8, 2024
603afb3
Update
keller-mark Aug 8, 2024
a2bbcf8
Merge branch 'main' of github.com:keller-mark/scanpy into keller-mark…
keller-mark Dec 10, 2024
4d15620
Relese note
keller-mark Dec 10, 2024
e8350d2
Formatting
keller-mark Dec 10, 2024
9030cb4
Try pre-commit
keller-mark Dec 10, 2024
967da92
Fix random_State type
keller-mark Dec 10, 2024
b70ec7e
Refactor type
keller-mark Dec 11, 2024
1784fa4
Fix citation format
keller-mark Dec 11, 2024
e92785e
Fix bibtex key
keller-mark Dec 11, 2024
2ce47d6
Use partial to implement sc.tl.densmap
keller-mark Dec 17, 2024
5838162
Update params in test
keller-mark Dec 17, 2024
ba2e887
Revert convenience sc.tl.densmap
keller-mark Dec 17, 2024
dffadfe
Add image plotting test
keller-mark Dec 17, 2024
44cfafc
Fix variable naming in test
keller-mark Dec 17, 2024
0507434
Merge branch 'main' of github.com:scverse/scanpy into keller-mark/den…
keller-mark Jan 3, 2025
e4b81f5
Frameon
keller-mark Jan 4, 2025
0ec47b3
Revert frameon change
keller-mark Jan 5, 2025
4f3ae06
Merge branch 'main' into keller-mark/densmap-2
keller-mark Jan 11, 2025
faaf5ae
Merge branch 'main' of github.com:scverse/scanpy into keller-mark/den…
keller-mark Jan 14, 2025
4152565
Change type name
keller-mark Jan 16, 2025
3d73310
Merge
keller-mark Jan 16, 2025
8f8787a
Check if specific to densmap
keller-mark Jan 16, 2025
3b68fe2
Add expected image
keller-mark Jan 16, 2025
8d45d49
Skipif for numba
keller-mark Jan 22, 2025
3cc91fc
Use pkg_version
keller-mark Jan 22, 2025
e8e00e6
Update .azure-pipelines.yml
ilan-gold Jan 23, 2025
5df86ac
Update .azure-pipelines.yml
ilan-gold Jan 23, 2025
5c2be1e
Update .azure-pipelines.yml
ilan-gold Jan 23, 2025
1e4ac26
Update .azure-pipelines.yml
ilan-gold Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ jobs:
artifactName: debug-data
condition: eq(variables['TEST_TYPE'], 'coverage')

- task: PublishBuildArtifacts@1
inputs:
pathToPublish: '$(Pipeline.Workspace)/s/tests/_images'
artifactName: '$(DEPENDENCIES_VERSION)-$(python.version)-images'

- script: bash <(curl -s https://codecov.io/bash)
displayName: 'Upload to codecov.io'
condition: eq(variables['TEST_TYPE'], 'coverage')
Expand Down
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ def setup(app: Sphinx):
("py:class", "scanpy._utils.Empty"),
("py:class", "numpy.random.mtrand.RandomState"),
("py:class", "scanpy.neighbors._types.KnnTransformerLike"),
("py:class", "scanpy.tools._types.DensmapMethodKwds"),
]

# Options for plot examples
Expand Down
14 changes: 14 additions & 0 deletions docs/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,20 @@ @article{Muraro2016
pages = {385--394.e3},
}

@article{Narayan2021,
author = {Narayan, Ashwin and Berger, Bonnie and Cho, Hyunghoon},
title = {Assessing single-cell transcriptomic variability through density-preserving data visualization},
volume = {39},
url = {https://doi.org/10.1038/s41587-020-00801-7},
doi = {10.1038/s41587-020-00801-7},
number = {6},
journal = {Nature Biotechnology},
publisher = {Springer Science and Business Media LLC},
year = {2021},
month = {jan},
pages = {765--774},
}

@article{Nowotschin2019,
author = {Nowotschin, Sonja and Setty, Manu and Kuo, Ying-Yi and Liu, Vincent and Garg, Vidur and Sharma, Roshan and Simon, Claire S. and Saiz, Nestor and Gardner, Rui and Boutet, Stéphane C. and Church, Deanna M. and Hoodless, Pamela A. and Hadjantonakis, Anna-Katerina and Pe’er, Dana},
title = {The emergent landscape of the mouse gut endoderm at single-cell resolution},
Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/2946.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add DensMAP support via `method="densmap"` in {func}`~scanpy.tl.umap` {smaller}`M Keller`
2 changes: 2 additions & 0 deletions src/scanpy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
if TYPE_CHECKING:
from typing import Any

from ._types import DensmapMethodKwds # noqa: F401


def __getattr__(name: str) -> Any:
if name == "pca":
Expand Down
9 changes: 9 additions & 0 deletions src/scanpy/tools/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from typing import TypedDict


class DensmapMethodKwds(TypedDict, total=False):
dens_lambda: float
dens_frac: float
dens_var_shift: float
85 changes: 76 additions & 9 deletions src/scanpy/tools/_umap.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import warnings
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Literal

import numpy as np
from sklearn.utils import check_array, check_random_state
Expand All @@ -18,6 +18,7 @@
from anndata import AnnData

from .._compat import _LegacyRandom
from ._types import DensmapMethodKwds

_InitPos = Literal["paga", "spectral", "random"]

Expand Down Expand Up @@ -52,7 +53,8 @@ def umap(
random_state: _LegacyRandom = 0,
a: float | None = None,
b: float | None = None,
method: Literal["umap", "rapids"] = "umap",
method: Literal["umap", "rapids", "densmap"] = "umap",
method_kwds: DensmapMethodKwds | None = None,
key_added: str | None = None,
neighbors_key: str = "neighbors",
copy: bool = False,
Expand Down Expand Up @@ -128,6 +130,8 @@ def umap(

``'umap'``
Umap’s simplical set embedding.
``'densmap'``
Umap’s simplical set embedding with densmap=True :cite:p:`Narayan2021`.
``'rapids'``
GPU accelerated implementation.

Expand All @@ -147,19 +151,51 @@ def umap(
copy
Return a copy instead of writing to adata.

method_kwds
Additional method parameters.

If method is ``'densmap'``, the following parameters are available:

``dens_lambda`` : `float`, optional (default: 2.0)
Controls the regularization weight of the density correlation term
in densMAP. Higher values prioritize density preservation over the
UMAP objective, and vice versa for values closer to zero. Setting this
parameter to zero is equivalent to running the original UMAP algorithm.
``dens_frac`` : `float`, optional (default: 0.3)
Controls the fraction of epochs (between 0 and 1) where the
density-augmented objective is used in densMAP. The first
(1 - dens_frac) fraction of epochs optimize the original UMAP objective
before introducing the density correlation term.
``dens_var_shift`` : `float`, optional (default: 0.1)
A small constant added to the variance of local radii in the
embedding when calculating the density correlation objective to
prevent numerical instability from dividing by a small number

Returns
-------
Returns `None` if `copy=False`, else returns an `AnnData` object. Sets the following fields:
Returns `None` if `copy=False`, else returns an `AnnData` object. Sets the following fields unless method is 'densmap':

`adata.obsm['X_umap' | key_added]` : :class:`numpy.ndarray` (dtype `float`)
UMAP coordinates of data.
`adata.uns['umap' | key_added]` : :class:`dict`
UMAP parameters.

When method is 'densmap', sets the following fields:

`adata.obsm['X_densmap']` : :class:`numpy.ndarray` (dtype `float`)
densMAP coordinates of data.
`adata.uns['densmap']` : :class:`dict`
densMAP parameters.

"""
adata = adata.copy() if copy else adata

key_obsm, key_uns = ("X_umap", "umap") if key_added is None else [key_added] * 2
key_obsm, key_uns = (
(("X_densmap", "densmap") if method == "densmap" else ("X_umap", "umap"))
if key_added is None
else [key_added] * 2
)
method_name = "DensMAP" if method == "densmap" else "UMAP"

if neighbors_key is None: # backwards compat
neighbors_key = "neighbors"
Expand All @@ -185,6 +221,7 @@ def umap(

if a is None or b is None:
a, b = find_ab_params(spread, min_dist)

adata.uns[key_uns] = dict(params=dict(a=a, b=b))
if isinstance(init_pos, str) and init_pos in adata.obsm:
init_coords = adata.obsm[init_pos]
Expand All @@ -208,11 +245,40 @@ def umap(
n_pcs=neigh_params.get("n_pcs", None),
silent=True,
)
if method == "umap":

if method_kwds is None:
method_kwds = {}

densmap_kwds = (
{
"graph_dists": neighbors["distances"],
"n_neighbors": neigh_params.get("n_neighbors", 15),
# Default params from umap package
# Reference: https://github.com/lmcinnes/umap/blob/868e55cb614f361a0d31540c1f4a4b175136025c/umap/umap_.py#L1692
# If user provided method_kwds, the user-provided values should
# overwrite the default values specified above.
"lambda": method_kwds.get("dens_lambda", 2.0),
"frac": method_kwds.get("dens_frac", 0.3),
"var_shift": method_kwds.get("dens_var_shift", 0.1),
}
if method == "densmap"
else {}
)
if method == "densmap":
adata.uns[key_uns]["params"].update(
{
"dens_lambda": densmap_kwds["lambda"],
"dens_frac": densmap_kwds["frac"],
"dens_var_shift": densmap_kwds["var_shift"],
}
)

if method == "umap" or method == "densmap":
# the data matrix X is really only used for determining the number of connected components
# for the init condition in the UMAP embedding
default_epochs = 500 if neighbors["connectivities"].shape[0] <= 10000 else 200
n_epochs = default_epochs if maxiter is None else maxiter

X_umap, _ = simplicial_set_embedding(
data=X,
graph=neighbors["connectivities"].tocoo(),
Expand All @@ -227,8 +293,8 @@ def umap(
random_state=random_state,
metric=neigh_params.get("metric", "euclidean"),
metric_kwds=neigh_params.get("metric_kwds", {}),
densmap=False,
densmap_kwds={},
densmap=(method == "densmap"),
densmap_kwds=densmap_kwds,
output_dens=False,
verbose=settings.verbosity > 3,
)
Expand Down Expand Up @@ -267,14 +333,15 @@ def umap(
random_state=random_state,
)
X_umap = umap.fit_transform(X_contiguous)

adata.obsm[key_obsm] = X_umap # annotate samples with UMAP coordinates
logg.info(
" finished",
time=start,
deep=(
"added\n"
f" {key_obsm!r}, UMAP coordinates (adata.obsm)\n"
f" {key_uns!r}, UMAP parameters (adata.uns)"
f" {key_obsm!r}, {method_name} coordinates (adata.obsm)\n"
f" {key_uns!r}, {method_name} parameters (adata.uns)"
),
)
return adata if copy else None
Binary file added tests/_images/another_umap/expected.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/_images/densmap_nocolor/expected.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 21 additions & 0 deletions tests/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,24 @@ def test_diffmap():
sc.tl.diffmap(pbmc, random_state=1234)
d3 = pbmc.obsm["X_diffmap"].copy()
assert_raises(AssertionError, assert_array_equal, d1, d3)


def test_densmap():
pbmc = pbmc68k_reduced()

# Checking that the results are reproducible
sc.tl.umap(pbmc, method="densmap")
d1 = pbmc.obsm["X_densmap"].copy()
sc.tl.umap(pbmc, method="densmap")
d2 = pbmc.obsm["X_densmap"].copy()
assert_array_equal(d1, d2)

# Checking if specifying random_state works, arrays shouldn't be equal
sc.tl.umap(pbmc, method="densmap", random_state=1234)
d3 = pbmc.obsm["X_densmap"].copy()
assert_raises(AssertionError, assert_array_equal, d1, d3)

# Checking if specifying dens_lambda works, arrays shouldn't be equal
sc.tl.umap(pbmc, method="densmap", method_kwds=dict(dens_lambda=2.3456))
d4 = pbmc.obsm["X_densmap"].copy()
assert_raises(AssertionError, assert_array_equal, d1, d4)
34 changes: 33 additions & 1 deletion tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,9 +1018,11 @@ def pbmc_scatterplots_session() -> AnnData:
pbmc.layers["sparse"] = pbmc.raw.X / 2
pbmc.layers["test"] = pbmc.X.copy() + 100
pbmc.var["numbers"] = [str(x) for x in range(pbmc.shape[1])]
sc.pp.neighbors(pbmc)
sc.pp.neighbors(pbmc, random_state=np.random.RandomState(1))
sc.tl.tsne(pbmc, random_state=0, n_pcs=30)
sc.tl.diffmap(pbmc)
sc.tl.umap(pbmc, key_added="X_another_umap", random_state=np.random.RandomState(1))
sc.tl.umap(pbmc, method="densmap", random_state=np.random.RandomState(1))
return pbmc


Expand Down Expand Up @@ -1180,6 +1182,36 @@ def test_scatterplots(image_comparer, pbmc_scatterplots, id, fn):
save_and_compare_images(id)


@pytest.mark.skipif(
pkg_version("numba") < Version("0.61.0"),
reason="Same random_state value produces different UMAP results between numba versions. See #2946",
)
@pytest.mark.parametrize(
("id", "fn"),
[
(
"another_umap",
partial(
sc.pl.embedding,
basis="X_another_umap",
),
),
(
"densmap_nocolor",
partial(
sc.pl.embedding,
basis="X_densmap",
),
),
],
)
def test_umap_scatterplots(image_comparer, pbmc_scatterplots, id, fn):
save_and_compare_images = partial(image_comparer, ROOT, tol=15)

fn(pbmc_scatterplots, show=False)
save_and_compare_images(id)


def test_scatter_embedding_groups_and_size(image_comparer):
# test that the 'groups' parameter sorts
# cells, such that the cells belonging to the groups are
Expand Down
Loading