[docs] misc docsite adjustments (#1020)

chanzuckerberg · Feb 23, 2024 · abbce9f · abbce9f
1 parent 38fff2d
commit abbce9f
Show file tree

Hide file tree

Showing 8 changed files with 226 additions and 455 deletions.
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
@@ -41,7 +41,7 @@ def get_anndata(
         X_name:
             The ``X`` layer to query. Defaults to ``"raw"``.
         X_layers:
-            Additional layers to add to :attr:``anndata.AnnData.layers``.
+            Additional layers to add to :attr:`anndata.AnnData.layers`.
         obs_value_filter:
             Value filter for the ``obs`` metadata. Value is a filter query written in the
             SOMA ``value_filter`` syntax.

diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py
@@ -80,9 +80,9 @@ def _open_soma(
 
 
 def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) -> soma.options.SOMATileDBContext:
-    """Return a :class:``tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
-    user. The customized context can then be passed to :func:``cellxgene_census.open_soma`` with the ``context``
-    argument or to :meth:`somacore.SOMAObject.open`` with the ``context`` argument, such as
+    """Return a :class:`tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
+    user. The customized context can then be passed to :func:`cellxgene_census.open_soma` with the ``context``
+    argument or to :meth:`somacore.SOMAObject.open` with the ``context`` argument, such as
     :meth:`tiledbsoma.Experiment.open`. Use the :meth:`tiledbsoma.SOMATileDBContext.replace` method on the returned
     object to customize its settings further.
 
@@ -92,7 +92,7 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->
             defaults. If not specified, the default configuration will be returned.
 
     Returns:
-        A :class:``tiledbsoma.SOMATileDBContext` object with sensible defaults.
+        A :class:`tiledbsoma.SOMATileDBContext` object with sensible defaults.
 
     Examples:
         To reduce the amount of memory used by TileDB-SOMA I/O operations:
@@ -144,7 +144,7 @@ def open_soma(
         tiledb_config:
             A dictionary of TileDB configuration parameters that will be used to open the SOMA object. Optional,
             defaults to ``None``. If specified, the parameters will override the default settings specified by
-            ``get_default_soma_context().tiledb_config``. Only one of the ``tiledb_config and ``context`` params
+            ``get_default_soma_context().tiledb_config``. Only one of the ``tiledb_config`` and ``context`` params
             can be specified.
         context:
             A custom :class:`tiledbsoma.SOMATileDBContext` that will be used to open the SOMA object.

diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
@@ -24,7 +24,7 @@ def get_embedding_metadata(embedding_uri: str, context: soma.options.SOMATileDBC
 
     Args:
         embedding_uri:
-            The embedding URI
+            The embedding URI.
         context:
             A custom :class:`tiledbsoma.SOMATileDBContext` which will be used to open the SOMA object. Optional,
             defaults to ``None``.
@@ -82,7 +82,7 @@ def get_embedding(
 
     Examples:
         >>> obs_somaids_to_fetch = np.array([10,11], dtype=np.int64)
-        >>> emb = cellxgene_census.experimental.get_embedding('2023-10-23', embedding_uri, obs_somaids_to_fetch)
+        >>> emb = cellxgene_census.experimental.get_embedding('2023-12-15', embedding_uri, obs_somaids_to_fetch)
         >>> emb.shape
         (2, 200)
         >>> emb[:, 0:4]

diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
@@ -327,7 +327,7 @@ def _read_partial_torch_batch(self, batch_size: int) -> ObsAndXDatum:
 
 
 class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsAndXDatum]]):  # type: ignore
-    """An :class:`torchdata.datapipes.iter.IterDataPipe` that reads ``obs`` and ``X`` data from a
+    r"""An :class:`torchdata.datapipes.iter.IterDataPipe` that reads ``obs`` and ``X`` data from a
     :class:`tiledbsoma.Experiment`, based upon the specified queries along the ``obs`` and ``var`` axes. Provides an
     iterator over these data when the object is passed to Python's built-in ``iter`` function.
 
@@ -351,10 +351,10 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsAndXDatum]]):  # type: ig
                  [2417,    0,    3]], dtype=torch.int64))
 
     The ``return_sparse_X`` parameter controls whether the ``X`` data is returned as a dense or sparse
-    :class:`torch.Tensor`. If the model supports use of sparse :class:`torch.Tensor`s, this will reduce memory usage.
+    :class:`torch.Tensor`. If the model supports use of sparse :class:`torch.Tensor`\ s, this will reduce memory usage.
 
     The ``obs_column_names`` parameter determines the data columns that are returned in the ``obs`` Tensor. The first
-    element is always the ``soma_joinid`` of the ``obs`` :class:`pandas.DataFrame` (or, equiavalently, the
+    element is always the ``soma_joinid`` of the ``obs`` :class:`pandas.DataFrame` (or, equivalently, the
     ``soma_dim_0`` of the ``X`` matrix). The remaining elements are the ``obs`` columns specified by
     ``obs_column_names``, and string-typed columns are encoded as integer values. If needed, these values can be decoded
     by obtaining the encoder for a given ``obs`` column name and calling its ``inverse_transform`` method:
@@ -394,7 +394,7 @@ def __init__(
         soma_chunk_size: Optional[int] = None,
         use_eager_fetch: bool = True,
     ) -> None:
-        """Construct a new ``ExperimentDataPipe``.
+        r"""Construct a new ``ExperimentDataPipe``.
 
         Args:
             experiment:
@@ -415,7 +415,7 @@ def __init__(
             batch_size:
                 The number of rows of ``obs`` and ``X`` data to return in each iteration. Defaults to ``1``. A value of
                 ``1`` will result in :class:`torch.Tensor` of rank 1 being returns (a single row); larger values will
-                result in :class:`torch.Tensor`s of rank 2 (multiple rows).
+                result in :class:`torch.Tensor`\ s of rank 2 (multiple rows).
             shuffle:
                 Whether to shuffle the ``obs`` and ``X`` data being returned. Defaults to ``False`` (no shuffling).
                 For performance reasons, shuffling is performed in two steps: 1) a global shuffling, where contiguous
@@ -436,7 +436,7 @@ def __init__(
             return_sparse_X:
                 Controls whether the ``X`` data is returned as a dense or sparse :class:`torch.Tensor`. As ``X`` data is
                 very sparse, setting this to ``True`` will reduce memory usage, if the model supports use of sparse
-                :class:`torch.Tensor`s. Defaults to ``False``, since sparse :class:`torch.Tensor`s are still
+                :class:`torch.Tensor`\ s. Defaults to ``False``, since sparse :class:`torch.Tensor`\ s are still
                 experimental in PyTorch.
             soma_chunk_size:
                 The number of ``obs``/``X`` rows to retrieve when reading data from SOMA. This impacts two aspects of
@@ -624,11 +624,11 @@ def _build_obs_encoders(self, query: soma.ExperimentAxisQuery) -> Encoders:
 
     # TODO: This does not work in multiprocessing mode, as child process's stats are not collected
     def stats(self) -> Stats:
-        """Get data loading stats for this :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
+        """Get data loading stats for this :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
 
         Returns:
-            The :class:`cellxgene_census.ml.pytorch.Stats` object for this
-            :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
+            The :class:`cellxgene_census.experimental.ml.pytorch.Stats` object for this
+            :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
 
         Lifecycle:
             experimental
@@ -637,7 +637,7 @@ def stats(self) -> Stats:
 
     @property
     def shape(self) -> Tuple[int, int]:
-        """Get the shape of the data that will be returned by this :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
+        """Get the shape of the data that will be returned by this :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
         This is the number of obs (cell) and var (feature) counts in the returned data. If used in multiprocessing mode
         (i.e. :class:`torch.utils.data.DataLoader` instantiated with num_workers > 0), the obs (cell) count will reflect
         the size of the partition of the data assigned to the active process.
@@ -684,23 +684,23 @@ def experiment_dataloader(
     **dataloader_kwargs: Any,
 ) -> DataLoader:
     """Factory method for :class:`torch.utils.data.DataLoader`. This method can be used to safely instantiate a
-    :class:`torch.utils.data.DataLoader` that works with :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`,
+    :class:`torch.utils.data.DataLoader` that works with :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`,
     since some of the :class:`torch.utils.data.DataLoader` constructor parameters are not applicable when using a
     :class:`torchdata.datapipes.iter.IterDataPipe` (``shuffle``, ``batch_size``, ``sampler``, ``batch_sampler``,
     ``collate_fn``).
 
     Args:
         datapipe:
             An :class:`torchdata.datapipes.iter.IterDataPipe`, which can be an
-            :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe` or any other
+            :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe` or any other
             :class:`torchdata.datapipes.iter.IterDataPipe` that has been chained to the
-            :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
+            :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
         num_workers:
             Number of worker processes to use for data loading. If ``0``, data will be loaded in the main process.
         **dataloader_kwargs:
             Additional keyword arguments to pass to the :class:`torch.utils.data.DataLoader` constructor,
             except for ``shuffle``, ``batch_size``, ``sampler``, ``batch_sampler``, and ``collate_fn``, which are not
-            supported when using :class:`cellxgene_census.ml.pytorch.ExperimentDataPipe`.
+            supported when using :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
 
     Returns:
         A :class:`torch.utils.data.DataLoader`.

diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
@@ -370,7 +370,7 @@ def get_highly_variable_genes(
         batch_key_func:
             Optional function to create a user-defined batch key. Function will be called once per row in the obs
             dataframe. Function will receive a single argument: a :class:`pandas.Series` containing values specified in
-            the``batch_key`` argument.
+            the ``batch_key`` argument.
 
     Returns:
         :class:`pandas.DataFrame` containing annotations for all ``var`` values specified by the query.
@@ -383,7 +383,7 @@ def get_highly_variable_genes(
 
     Examples:
         Fetch a :class:`pandas.DataFrame` containing var annotations for a subset of the cells matching the
-        ``obs_value_filter`:
+        ``obs_value_filter``:
 
         >>> hvg = get_highly_variable_genes(
                 census,
@@ -397,15 +397,13 @@ def get_highly_variable_genes(
         >>> with cellxgene_census.open_soma(census_version="stable") as census:
                 organism = "mus_musculus"
                 obs_value_filter = "is_primary_data == True and tissue_general == 'lung'"
-
                 # Get the highly variable genes
                 hvg = cellxgene_census.experimental.pp.get_highly_variable_genes(
                     census,
                     organism=organism,
                     obs_value_filter=obs_value_filter,
                     n_top_genes = 500
                 )
-
                 # Fetch AnnData - all cells matching obs_value_filter, just the HVGs
                 hvg_soma_ids = hvg[hvg.highly_variable].index.values
                 adata = cellxgene_census.get_anndata(

diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
@@ -29,7 +29,7 @@ def mean_variance(
 
     Args:
         query:
-            An :class:`tiledbsoma.ExperimentAxisQuery`, specifying the ``obs``/``var`` selection over which mean and
+            A :class:`tiledbsoma.ExperimentAxisQuery`, specifying the ``obs``/``var`` selection over which mean and
             variance are calculated.
         layer:
             X layer used, e.g., ``"raw"``.