Skip to content

Commit

Permalink
[docs] misc docsite adjustments (#1020)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebezzi authored Feb 23, 2024
1 parent 38fff2d commit abbce9f
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 455 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_anndata(
X_name:
The ``X`` layer to query. Defaults to ``"raw"``.
X_layers:
Additional layers to add to :attr:``anndata.AnnData.layers``.
Additional layers to add to :attr:`anndata.AnnData.layers`.
obs_value_filter:
Value filter for the ``obs`` metadata. Value is a filter query written in the
SOMA ``value_filter`` syntax.
Expand Down
10 changes: 5 additions & 5 deletions api/python/cellxgene_census/src/cellxgene_census/_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ def _open_soma(


def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) -> soma.options.SOMATileDBContext:
"""Return a :class:``tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
user. The customized context can then be passed to :func:``cellxgene_census.open_soma`` with the ``context``
argument or to :meth:`somacore.SOMAObject.open`` with the ``context`` argument, such as
"""Return a :class:`tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
user. The customized context can then be passed to :func:`cellxgene_census.open_soma` with the ``context``
argument or to :meth:`somacore.SOMAObject.open` with the ``context`` argument, such as
:meth:`tiledbsoma.Experiment.open`. Use the :meth:`tiledbsoma.SOMATileDBContext.replace` method on the returned
object to customize its settings further.
Expand All @@ -92,7 +92,7 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->
defaults. If not specified, the default configuration will be returned.
Returns:
A :class:``tiledbsoma.SOMATileDBContext` object with sensible defaults.
A :class:`tiledbsoma.SOMATileDBContext` object with sensible defaults.
Examples:
To reduce the amount of memory used by TileDB-SOMA I/O operations:
Expand Down Expand Up @@ -144,7 +144,7 @@ def open_soma(
tiledb_config:
A dictionary of TileDB configuration parameters that will be used to open the SOMA object. Optional,
defaults to ``None``. If specified, the parameters will override the default settings specified by
``get_default_soma_context().tiledb_config``. Only one of the ``tiledb_config and ``context`` params
``get_default_soma_context().tiledb_config``. Only one of the ``tiledb_config`` and ``context`` params
can be specified.
context:
A custom :class:`tiledbsoma.SOMATileDBContext` that will be used to open the SOMA object.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def get_embedding_metadata(embedding_uri: str, context: soma.options.SOMATileDBC
Args:
embedding_uri:
The embedding URI
The embedding URI.
context:
A custom :class:`tiledbsoma.SOMATileDBContext` which will be used to open the SOMA object. Optional,
defaults to ``None``.
Expand Down Expand Up @@ -82,7 +82,7 @@ def get_embedding(
Examples:
>>> obs_somaids_to_fetch = np.array([10,11], dtype=np.int64)
>>> emb = cellxgene_census.experimental.get_embedding('2023-10-23', embedding_uri, obs_somaids_to_fetch)
>>> emb = cellxgene_census.experimental.get_embedding('2023-12-15', embedding_uri, obs_somaids_to_fetch)
>>> emb.shape
(2, 200)
>>> emb[:, 0:4]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def _read_partial_torch_batch(self, batch_size: int) -> ObsAndXDatum:


class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsAndXDatum]]): # type: ignore
"""An :class:`torchdata.datapipes.iter.IterDataPipe` that reads ``obs`` and ``X`` data from a
r"""An :class:`torchdata.datapipes.iter.IterDataPipe` that reads ``obs`` and ``X`` data from a
:class:`tiledbsoma.Experiment`, based upon the specified queries along the ``obs`` and ``var`` axes. Provides an
iterator over these data when the object is passed to Python's built-in ``iter`` function.
Expand All @@ -351,10 +351,10 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsAndXDatum]]): # type: ig
[2417, 0, 3]], dtype=torch.int64))
The ``return_sparse_X`` parameter controls whether the ``X`` data is returned as a dense or sparse
:class:`torch.Tensor`. If the model supports use of sparse :class:`torch.Tensor`s, this will reduce memory usage.
:class:`torch.Tensor`. If the model supports use of sparse :class:`torch.Tensor`\ s, this will reduce memory usage.
The ``obs_column_names`` parameter determines the data columns that are returned in the ``obs`` Tensor. The first
element is always the ``soma_joinid`` of the ``obs`` :class:`pandas.DataFrame` (or, equiavalently, the
element is always the ``soma_joinid`` of the ``obs`` :class:`pandas.DataFrame` (or, equivalently, the
``soma_dim_0`` of the ``X`` matrix). The remaining elements are the ``obs`` columns specified by
``obs_column_names``, and string-typed columns are encoded as integer values. If needed, these values can be decoded
by obtaining the encoder for a given ``obs`` column name and calling its ``inverse_transform`` method:
Expand Down Expand Up @@ -394,7 +394,7 @@ def __init__(
soma_chunk_size: Optional[int] = None,
use_eager_fetch: bool = True,
) -> None:
"""Construct a new ``ExperimentDataPipe``.
r"""Construct a new ``ExperimentDataPipe``.
Args:
experiment:
Expand All @@ -415,7 +415,7 @@ def __init__(
batch_size:
The number of rows of ``obs`` and ``X`` data to return in each iteration. Defaults to ``1``. A value of
``1`` will result in :class:`torch.Tensor` of rank 1 being returns (a single row); larger values will
result in :class:`torch.Tensor`s of rank 2 (multiple rows).
result in :class:`torch.Tensor`\ s of rank 2 (multiple rows).
shuffle:
Whether to shuffle the ``obs`` and ``X`` data being returned. Defaults to ``False`` (no shuffling).
For performance reasons, shuffling is performed in two steps: 1) a global shuffling, where contiguous
Expand All @@ -436,7 +436,7 @@ def __init__(
return_sparse_X:
Controls whether the ``X`` data is returned as a dense or sparse :class:`torch.Tensor`. As ``X`` data is
very sparse, setting this to ``True`` will reduce memory usage, if the model supports use of sparse
:class:`torch.Tensor`s. Defaults to ``False``, since sparse :class:`torch.Tensor`s are still
:class:`torch.Tensor`\ s. Defaults to ``False``, since sparse :class:`torch.Tensor`\ s are still
experimental in PyTorch.
soma_chunk_size:
The number of ``obs``/``X`` rows to retrieve when reading data from SOMA. This impacts two aspects of
Expand Down Expand Up @@ -624,11 +624,11 @@ def _build_obs_encoders(self, query: soma.ExperimentAxisQuery) -> Encoders:

# TODO: This does not work in multiprocessing mode, as child process's stats are not collected
def stats(self) -> Stats:
"""Get data loading stats for this :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
"""Get data loading stats for this :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
Returns:
The :class:`cellxgene_census.ml.pytorch.Stats` object for this
:class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
The :class:`cellxgene_census.experimental.ml.pytorch.Stats` object for this
:class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
Lifecycle:
experimental
Expand All @@ -637,7 +637,7 @@ def stats(self) -> Stats:

@property
def shape(self) -> Tuple[int, int]:
"""Get the shape of the data that will be returned by this :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
"""Get the shape of the data that will be returned by this :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
This is the number of obs (cell) and var (feature) counts in the returned data. If used in multiprocessing mode
(i.e. :class:`torch.utils.data.DataLoader` instantiated with num_workers > 0), the obs (cell) count will reflect
the size of the partition of the data assigned to the active process.
Expand Down Expand Up @@ -684,23 +684,23 @@ def experiment_dataloader(
**dataloader_kwargs: Any,
) -> DataLoader:
"""Factory method for :class:`torch.utils.data.DataLoader`. This method can be used to safely instantiate a
:class:`torch.utils.data.DataLoader` that works with :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`,
:class:`torch.utils.data.DataLoader` that works with :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`,
since some of the :class:`torch.utils.data.DataLoader` constructor parameters are not applicable when using a
:class:`torchdata.datapipes.iter.IterDataPipe` (``shuffle``, ``batch_size``, ``sampler``, ``batch_sampler``,
``collate_fn``).
Args:
datapipe:
An :class:`torchdata.datapipes.iter.IterDataPipe`, which can be an
:class:`cellxgene_census.ml.pytorch.ExperimentDataPipe` or any other
:class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe` or any other
:class:`torchdata.datapipes.iter.IterDataPipe` that has been chained to the
:class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
:class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
num_workers:
Number of worker processes to use for data loading. If ``0``, data will be loaded in the main process.
**dataloader_kwargs:
Additional keyword arguments to pass to the :class:`torch.utils.data.DataLoader` constructor,
except for ``shuffle``, ``batch_size``, ``sampler``, ``batch_sampler``, and ``collate_fn``, which are not
supported when using :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
supported when using :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
Returns:
A :class:`torch.utils.data.DataLoader`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def get_highly_variable_genes(
batch_key_func:
Optional function to create a user-defined batch key. Function will be called once per row in the obs
dataframe. Function will receive a single argument: a :class:`pandas.Series` containing values specified in
the``batch_key`` argument.
the ``batch_key`` argument.
Returns:
:class:`pandas.DataFrame` containing annotations for all ``var`` values specified by the query.
Expand All @@ -383,7 +383,7 @@ def get_highly_variable_genes(
Examples:
Fetch a :class:`pandas.DataFrame` containing var annotations for a subset of the cells matching the
``obs_value_filter`:
``obs_value_filter``:
>>> hvg = get_highly_variable_genes(
census,
Expand All @@ -397,15 +397,13 @@ def get_highly_variable_genes(
>>> with cellxgene_census.open_soma(census_version="stable") as census:
organism = "mus_musculus"
obs_value_filter = "is_primary_data == True and tissue_general == 'lung'"
# Get the highly variable genes
hvg = cellxgene_census.experimental.pp.get_highly_variable_genes(
census,
organism=organism,
obs_value_filter=obs_value_filter,
n_top_genes = 500
)
# Fetch AnnData - all cells matching obs_value_filter, just the HVGs
hvg_soma_ids = hvg[hvg.highly_variable].index.values
adata = cellxgene_census.get_anndata(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def mean_variance(
Args:
query:
An :class:`tiledbsoma.ExperimentAxisQuery`, specifying the ``obs``/``var`` selection over which mean and
A :class:`tiledbsoma.ExperimentAxisQuery`, specifying the ``obs``/``var`` selection over which mean and
variance are calculated.
layer:
X layer used, e.g., ``"raw"``.
Expand Down
Loading

0 comments on commit abbce9f

Please sign in to comment.