Skip to content

Commit

Permalink
Merge branch 'main' into ig/cache_indptr
Browse files Browse the repository at this point in the history
  • Loading branch information
ilan-gold authored Dec 18, 2023
2 parents 25ad928 + fbd2d84 commit eed21ab
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
16 changes: 13 additions & 3 deletions anndata/_core/sparse_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from abc import ABC
from functools import cached_property
from itertools import accumulate, chain
from pathlib import Path
from typing import TYPE_CHECKING, Literal, NamedTuple

import h5py
Expand All @@ -25,7 +26,7 @@
from scipy.sparse import _sparsetools

from anndata._core.index import _fix_slice_bounds
from anndata.compat import H5Group, ZarrGroup
from anndata.compat import H5Group, ZarrArray, ZarrGroup

from ..compat import _read_attr

Expand Down Expand Up @@ -58,8 +59,17 @@ class BackedSparseMatrix(_cs_matrix):
def copy(self) -> ss.spmatrix:
if isinstance(self.data, h5py.Dataset):
return sparse_dataset(self.data.parent).to_memory()
else:
return super().copy()
if isinstance(self.data, ZarrArray):
import zarr

return sparse_dataset(
zarr.open(
store=self.data.store,
mode="r",
chunk_store=self.data.chunk_store, # chunk_store is needed, not clear why
)[Path(self.data.path).parent]
).to_memory()
return super().copy()

def _set_many(self, i: Iterable[int], j: Iterable[int], x):
"""\
Expand Down
20 changes: 4 additions & 16 deletions anndata/tests/test_backed_sparse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from contextlib import contextmanager
from typing import TYPE_CHECKING, Callable, Literal

import h5py
Expand Down Expand Up @@ -64,7 +63,7 @@ def callback(func, elem_name, elem, iospec):
**{k: read_dispatched(v, callback) for k, v in elem.items()}
)
if iospec.encoding_type in {"csc_matrix", "csr_matrix"}:
return sparse_dataset(elem)._to_backed()
return sparse_dataset(elem)
return func(elem)

adata = read_dispatched(f, callback=callback)
Expand All @@ -90,6 +89,7 @@ def test_backed_indexing(

assert_equal(csr_mem[obs_idx, var_idx].X, csr_disk[obs_idx, var_idx].X)
assert_equal(csr_mem[obs_idx, var_idx].X, csc_disk[obs_idx, var_idx].X)
assert_equal(csr_mem.X[...], csc_disk.X[...])
assert_equal(csr_mem[obs_idx, :].X, dense_disk[obs_idx, :].X)
assert_equal(csr_mem[obs_idx].X, csr_disk[obs_idx].X)
assert_equal(csr_mem[:, var_idx].X, dense_disk[:, var_idx].X)
Expand Down Expand Up @@ -288,17 +288,6 @@ def test_anndata_sparse_compat(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"])
assert_equal(adata.X, base)


@contextmanager
def xfail_if_zarr(diskfmt: Literal["h5ad", "zarr"]):
if diskfmt == "zarr":
with pytest.raises(AssertionError):
yield
# TODO: Zarr backed mode https://github.com/scverse/anndata/issues/219
pytest.xfail("Backed zarr not really supported yet")
else:
yield


def test_backed_sizeof(
ondisk_equivalent_adata: tuple[AnnData, AnnData, AnnData, AnnData],
diskfmt: Literal["h5ad", "zarr"],
Expand All @@ -308,6 +297,5 @@ def test_backed_sizeof(
assert csr_mem.__sizeof__() == csr_disk.__sizeof__(with_disk=True)
assert csr_mem.__sizeof__() == csc_disk.__sizeof__(with_disk=True)
assert csr_disk.__sizeof__(with_disk=True) == csc_disk.__sizeof__(with_disk=True)
with xfail_if_zarr(diskfmt):
assert csr_mem.__sizeof__() > csr_disk.__sizeof__()
assert csr_mem.__sizeof__() > csc_disk.__sizeof__()
assert csr_mem.__sizeof__() > csr_disk.__sizeof__()
assert csr_mem.__sizeof__() > csc_disk.__sizeof__()
1 change: 1 addition & 0 deletions docs/release-notes/0.10.4.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* `adata[:, []]` now returns an `AnnData` object empty on the appropriate dimensions instead of erroring {pr}`1243` {user}`ilan-gold`
* `adata.X[mask]` works in newer `numpy` versions when `X` is `backed` {pr}`1255` {user}`ilan-gold`
* `BaseCompressedSparseDataset`'s `indptr` is cached {pr}`1266` {user}`ilan-gold`
* `adata.X[...]` fixed for `X` as a `BaseCompressedSparseDataset` with `zarr` backend {pr}`1265` {user}`ilan-gold`

```{rubric} Documentation
```
Expand Down

0 comments on commit eed21ab

Please sign in to comment.