diff --git a/.flake8 b/.flake8
new file mode 100644
index 000000000..19fb3a8f4
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,13 @@
+[flake8]
+max-line-length = 120
+extend-ignore =
+    # See https://github.com/PyCQA/pycodestyle/issues/373
+    E203,
+    E402
+exclude =
+    .git/
+    venv/
+    tmp/
+    .ipynb_checkpoints/
+    __pycache__
+
diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml
new file mode 100644
index 000000000..5685576fb
--- /dev/null
+++ b/.github/workflows/formatting.yml
@@ -0,0 +1,35 @@
+name: Python Linting
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: .
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+      - run: pip install -U pip
+      - name: Python Black
+        run: |
+          python -m pip install black black[jupyter]
+          python -m black --check --diff .
+      - name: Python isort
+        run: |
+          python -m pip install isort
+          python -m isort --check --diff .
+      - name: Python style with flake8[bugbear]
+        run: |
+          python -m pip install flake8-bugbear
+          python -m flake8 .
+      - name: Check type annotations mypy
+        run: |
+          python -m pip install mypy==0.982 types-setuptools types-requests numpy
+          python -m mypy .
diff --git a/.gitignore b/.gitignore
index b6e47617d..ea9e0004e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,7 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# common user tmp directories
+tmp
+temp
diff --git a/README.md b/README.md
index 357d71617..1bc2711e5 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 # CELLxGENE Cell Census
 
-This repository contains documentation and example code related to the Chan Zuckerberg CELLxGENE Cell Census.
+**Status**: Unstable, under rapid development
+
+This repository contains documentation and example code related to the Chan Zuckerberg CELLxGENE Cell Census, and a client (API) package to simplify accessing the Cell Census data.
 
 The CZ Cell Census is an aggregation of all public single cell data available in [CELLxGENE Discover](https://cellxgene.cziscience.com/), published in API-accessible formats, including the [SOMA API](https://github.com/single-cell-data/).
 
diff --git a/api/python/cell_census/LICENSE b/api/python/cell_census/LICENSE
new file mode 100644
index 000000000..9b6892998
--- /dev/null
+++ b/api/python/cell_census/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Chan Zuckerberg Initiative
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/api/python/cell_census/REAMDE.md b/api/python/cell_census/REAMDE.md
new file mode 100644
index 000000000..0509228ca
--- /dev/null
+++ b/api/python/cell_census/REAMDE.md
@@ -0,0 +1,9 @@
+The `cell_census` package provides an API to facilitate use of the CZI Science Cell Census.
+
+**Status**: Unstable, under rapid development
+
+For more information, see the [cell_census repo](https://github.com/chanzuckerberg/cell-census/)### For More Help
+
+For more help, please file a issue on the repo, or contact us at <cellxgene@chanzuckerberg.com>
+
+If you believe you have found a security issue, we would appreciate notification. Please send email to <security@chanzuckerberg.com>.
diff --git a/api/python/cell_census/setup.cfg b/api/python/cell_census/setup.cfg
new file mode 100644
index 000000000..02c159d18
--- /dev/null
+++ b/api/python/cell_census/setup.cfg
@@ -0,0 +1,27 @@
+[metadata]
+name = cell_census
+version = attr: cell_census.__version__
+author = Chan Zuckerberg Initiative
+author_email = cellxgene@chanzuckerberg.com
+description = API to simplify use of the CZI Science CELLxGENE Cell Census
+long_description = file: README.md LICENSE
+license = MIT
+url = https://github.com/chanzuckerberg/cell-census
+
+[options]
+python_requires = >= 3.8
+install_requires =
+    numba
+    numpy
+    requests
+    tiledb
+    tiledbsoma
+    typing_extensions
+    s3fs
+    scikit-misc
+package_dir=
+    =src
+packages=find:
+
+[options.packages.find]
+where=src
diff --git a/api/python/cell_census/setup.py b/api/python/cell_census/setup.py
new file mode 100644
index 000000000..7f1a1763c
--- /dev/null
+++ b/api/python/cell_census/setup.py
@@ -0,0 +1,4 @@
+from setuptools import setup
+
+if __name__ == "__main__":
+    setup()
diff --git a/api/python/cell_census/src/cell_census/__init__.py b/api/python/cell_census/src/cell_census/__init__.py
new file mode 100644
index 000000000..29eca5a04
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/__init__.py
@@ -0,0 +1,14 @@
+from .get_anndata import get_anndata
+from .open import download_source_h5ad, get_source_h5ad_uri, open_soma
+from .release_directory import get_directory, get_release_description
+
+__version__ = "0.0.1-dev0"
+
+__all__ = [
+    "download_source_h5ad",
+    "get_anndata",
+    "get_directory",
+    "get_source_h5ad_uri",
+    "get_release_description",
+    "open_soma",
+]
diff --git a/api/python/cell_census/src/cell_census/compute/__init__.py b/api/python/cell_census/src/cell_census/compute/__init__.py
new file mode 100644
index 000000000..1a6326ed6
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/compute/__init__.py
@@ -0,0 +1,7 @@
+from .highly_variable_genes import highly_variable_genes
+from .meanvar import OnlineMatrixMeanVariance
+
+__all__ = [
+    "highly_variable_genes",
+    "OnlineMatrixMeanVariance",
+]
diff --git a/api/python/cell_census/src/cell_census/compute/highly_variable_genes.py b/api/python/cell_census/src/cell_census/compute/highly_variable_genes.py
new file mode 100644
index 000000000..18a400158
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/compute/highly_variable_genes.py
@@ -0,0 +1,94 @@
+import numpy as np
+import pandas as pd
+
+from ..experiment_query import ExperimentQuery
+from .meanvar import OnlineMatrixMeanVariance
+
+
+def highly_variable_genes(query: ExperimentQuery, n_top_genes: int = 10) -> pd.DataFrame:
+    """
+    Acknowledgements: scanpy highly variable genes implementation, github.com/scverse/scanpy
+    """
+    use_prefetch = True
+
+    try:
+        import skmisc.loess
+    except ImportError:
+        raise ImportError("Please install skmisc package via `pip install --user scikit-misc")
+
+    indexer = query.get_indexer()
+    mvn = OnlineMatrixMeanVariance(query.n_obs, query.n_vars)
+    for arrow_tbl in query.X("raw", prefetch=use_prefetch):
+        var_dim = indexer.var_index(arrow_tbl["soma_dim_1"])
+        data = arrow_tbl["soma_data"].to_numpy()
+        mvn.update(var_dim, data)
+
+    u, v = mvn.finalize()
+    var_df = pd.DataFrame(
+        index=pd.Index(data=query.var_joinids(), name="soma_joinid"),
+        data={
+            "means": u,
+            "variances": v,
+        },
+    )
+
+    estimated_variances = np.zeros((len(var_df),), dtype=np.float64)
+    not_const = v > 0
+    y = np.log10(v[not_const])
+    x = np.log10(u[not_const])
+    model = skmisc.loess.loess(x, y, span=0.3, degree=2)
+    model.fit()
+    estimated_variances[not_const] = model.outputs.fitted_values
+    reg_std = np.sqrt(10**estimated_variances)
+
+    # A second pass over the data is required because the clip value
+    # is determined by the first pass
+    N = query.n_obs
+    vmax = np.sqrt(N)
+    clip_val = reg_std * vmax + u
+    counts_sum = np.zeros((query.n_vars,), dtype=np.float64)  # clipped
+    squared_counts_sum = np.zeros((query.n_vars,), dtype=np.float64)  # clipped
+    for arrow_tbl in query.X("raw", prefetch=use_prefetch):
+        var_dim = indexer.var_index(arrow_tbl["soma_dim_1"])
+        data = arrow_tbl["soma_data"].to_numpy()
+        # clip
+        mask = data > clip_val[var_dim]
+        data = data.copy()
+        data[mask] = clip_val[var_dim[mask]]
+        np.add.at(counts_sum, var_dim, data)
+        np.add.at(squared_counts_sum, var_dim, data**2)
+
+    norm_gene_vars = (1 / ((N - 1) * np.square(reg_std))) * (
+        (N * np.square(u)) + squared_counts_sum - 2 * counts_sum * u
+    )
+    norm_gene_vars = norm_gene_vars.reshape(1, -1)
+
+    # argsort twice gives ranks, small rank means most variable
+    ranked_norm_gene_vars = np.argsort(np.argsort(-norm_gene_vars, axis=1), axis=1)
+
+    # this is done in SelectIntegrationFeatures() in Seurat v3
+    ranked_norm_gene_vars = ranked_norm_gene_vars.astype(np.float32)
+    num_batches_high_var = np.sum((ranked_norm_gene_vars < n_top_genes).astype(int), axis=0)
+    ranked_norm_gene_vars[ranked_norm_gene_vars >= n_top_genes] = np.nan
+    ma_ranked = np.ma.masked_invalid(ranked_norm_gene_vars)  # type: ignore
+    median_ranked = np.ma.median(ma_ranked, axis=0).filled(np.nan)  # type: ignore
+
+    var_df = var_df.assign(
+        highly_variable_nbatches=pd.Series(num_batches_high_var, index=var_df.index),
+        highly_variable_rank=pd.Series(median_ranked, index=var_df.index),
+        variances_norm=pd.Series(np.mean(norm_gene_vars, axis=0), index=var_df.index),
+    )
+
+    sorted_index = (
+        var_df[["highly_variable_rank", "highly_variable_nbatches"]]
+        .sort_values(
+            ["highly_variable_rank", "highly_variable_nbatches"],
+            ascending=[True, False],
+            na_position="last",
+        )
+        .index
+    )
+    var_df["highly_variable"] = False
+    var_df = var_df.drop(columns=["highly_variable_nbatches"])
+    var_df.loc[sorted_index[: int(n_top_genes)], "highly_variable"] = True
+    return var_df
diff --git a/api/python/cell_census/src/cell_census/compute/meanvar.py b/api/python/cell_census/src/cell_census/compute/meanvar.py
new file mode 100644
index 000000000..722b12ac2
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/compute/meanvar.py
@@ -0,0 +1,76 @@
+import numba
+import numpy as np
+import numpy.typing as npt
+
+
+class OnlineMatrixMeanVariance:
+    n_samples: int
+    n_variables: int
+
+    def __init__(self, n_samples: int, n_variables: int):
+        """
+        Compute mean and variance for n_variables over n_samples, encoded
+        in a COO format. Equivalent to:
+            numpy.mean(data, axis=0)
+            numpy.var(data, axix=0)
+        where the input `data` is of shape (n_samples, n_variables)
+        """
+        self.n_samples = n_samples
+        self.n_variables = n_variables
+
+        self.n_a = np.zeros((n_variables,), dtype=np.int32)
+        self.u_a = np.zeros((n_variables,), dtype=np.float64)
+        self.M2_a = np.zeros((n_variables,), dtype=np.float64)
+
+    def update(self, coord_vec: npt.NDArray[np.int64], value_vec: npt.NDArray[np.float32]) -> None:
+        _mean_variance_update(coord_vec, value_vec, self.n_a, self.u_a, self.M2_a)
+
+    def finalize(self) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+        """
+        Returns tuple containing mean and variance
+        """
+        u, M2 = _mean_variance_finalize(self.n_samples, self.n_a, self.u_a, self.M2_a)
+
+        # compute sample variance
+        var = M2 / max(1, (self.n_samples - 1))
+
+        return u, var
+
+
+# TODO: add type signatures to annotation, removing need to do dynamic generation
+
+
+@numba.jit(nopython=True, nogil=True)  # type: ignore[misc]  # See https://github.com/numba/numba/issues/7424
+def _mean_variance_update(
+    col_arr: npt.NDArray[np.int64],
+    val_arr: npt.NDArray[np.float32],
+    n: npt.NDArray[np.int32],
+    u: npt.NDArray[np.float64],
+    M2: npt.NDArray[np.float64],
+) -> None:
+    """
+    Incrementally accumulate mean and sum of square of distance from mean using
+    Welford's online method.
+    """
+    for col, val in zip(col_arr, val_arr):
+        u_prev = u[col]
+        M2_prev = M2[col]
+        n[col] += 1
+        u[col] = u_prev + (val - u_prev) / n[col]
+        M2[col] = M2_prev + (val - u_prev) * (val - u[col])
+
+
+@numba.jit(nopython=True, nogil=True)  # type: ignore[misc]  # See https://github.com/numba/numba/issues/7424
+def _mean_variance_finalize(
+    n_samples: int, n_a: npt.NDArray[np.int32], u_a: npt.NDArray[np.float64], M2_a: npt.NDArray[np.float64]
+) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+    """
+    Finalize incremental values, acconting for missing elements (due to sparse input).
+    Non-sparse and sparse combined using Chan's parallel adaptation of Welford's.
+    The code assumes the sparse elements are all zero and ignores those terms.
+    """
+    n_b = n_samples - n_a
+    delta = -u_a  # assumes u_b == 0
+    u = (n_a * u_a) / n_samples
+    M2 = M2_a + delta**2 * n_a * n_b / n_samples  # assumes M2_b == 0
+    return u, M2
diff --git a/api/python/cell_census/src/cell_census/experiment_query/__init__.py b/api/python/cell_census/src/cell_census/experiment_query/__init__.py
new file mode 100644
index 000000000..09a371d9a
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/experiment_query/__init__.py
@@ -0,0 +1,12 @@
+from .axis import AxisQuery
+from .query import ExperimentQuery, experiment_query
+from .types import AxisColumnNames
+from .util import X_as_series
+
+__all__ = [
+    "experiment_query",
+    "AxisColumnNames",
+    "AxisQuery",
+    "ExperimentQuery",
+    "X_as_series",
+]
diff --git a/api/python/cell_census/src/cell_census/experiment_query/anndata.py b/api/python/cell_census/src/cell_census/experiment_query/anndata.py
new file mode 100644
index 000000000..b33c5ba68
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/experiment_query/anndata.py
@@ -0,0 +1,35 @@
+from typing import Dict, Tuple
+
+import anndata
+import pyarrow as pa
+import scipy.sparse as sparse
+
+from .types import ExperimentQueryReadArrowResult
+
+
+def arrow_to_scipy_csr(X: pa.Table, shape: Tuple[int, int]) -> sparse.csr_matrix:
+    return sparse.csr_matrix((X["soma_data"].to_numpy(), (X["_dim_0"].to_numpy(), X["_dim_1"].to_numpy())), shape=shape)
+
+
+def make_anndata(query_result: ExperimentQueryReadArrowResult) -> anndata.AnnData:
+
+    obs = query_result["obs"]
+    obs = obs.to_pandas()
+    obs.index = obs.index.map(str)
+
+    var = query_result["var"]
+    var = var.to_pandas()
+    var.index = var.index.map(str)
+
+    shape = (len(obs), len(var))
+
+    X = query_result.get("X", None)
+    if X is not None:
+        X = arrow_to_scipy_csr(X, shape)
+
+    X_layers = query_result.get("X_layers", {})
+    layers: Dict[str, sparse.csr_matrix] = {}
+    for X_layer_name, X_layer_table in X_layers.items():
+        layers[X_layer_name] = arrow_to_scipy_csr(X_layer_table, shape)
+
+    return anndata.AnnData(X=X, obs=obs, var=var, layers=(layers if len(layers) else None))
diff --git a/api/python/cell_census/src/cell_census/experiment_query/axis.py b/api/python/cell_census/src/cell_census/experiment_query/axis.py
new file mode 100644
index 000000000..15619b074
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/experiment_query/axis.py
@@ -0,0 +1,64 @@
+from dataclasses import dataclass
+from typing import Optional, Tuple, TypedDict, Union
+
+import numpy as np
+import numpy.typing as npt
+import pyarrow as pa
+
+# Type declaration/helpers local to this file
+#
+Coordinates = Tuple[Union[slice, int, npt.ArrayLike], ...]
+ValueFilter = str
+
+MatrixAxisQuery = TypedDict(
+    "MatrixAxisQuery",
+    {
+        "obs": "AxisQuery",
+        "var": "AxisQuery",
+    },
+)
+
+
+@dataclass()
+class AxisQuery:
+    """
+    Define a single-axis dataframe query based upon either a value filter predicate or coordinates.
+
+    Can have value:
+    * None - no query, ie, all data
+    * Coordinates - a set of coordinates on the axis dataframe index (or soma_rowids if a dense dataframe)
+    * A SOMA `value_filter` across columns in the axis dataframe
+
+    Examples:
+    ```
+        AxisQuery()
+        AxisQuery(coords=[0,1,2])
+        AxisQuery(value_filter="tissue == 'lung'")
+    ```
+    """
+
+    value_filter: Optional[str] = None
+    coords: Optional[Coordinates] = None
+
+    def __post_init__(self) -> None:
+        # TODO: Error class
+        if not (self.value_filter is None) != (self.coords is None):
+            raise Exception("FilterSpec - value_filter or coords may be specified, but not both.")
+
+        if self.value_filter is not None:
+            # If a a value_filter, default to all coords
+            self.coords = (slice(None),)
+        else:
+            if not isinstance(self.coords, tuple):
+                raise Exception("FilterSpec - coords must be tuple of int, slice or numpy.array_like")
+            coords = []
+            for c in self.coords:
+                if isinstance(c, int) or isinstance(c, slice):
+                    coords.append(c)
+                else:
+                    coords.append(pa.array(np.array(c, dtype=np.int64)))
+            self.coords = tuple(coords)
+
+    def is_value_filter(self) -> bool:
+        """Return True if this is a value filter, else False if coordinates"""
+        return self.value_filter is not None
diff --git a/api/python/cell_census/src/cell_census/experiment_query/query.py b/api/python/cell_census/src/cell_census/experiment_query/query.py
new file mode 100644
index 000000000..58b560401
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/experiment_query/query.py
@@ -0,0 +1,459 @@
+import asyncio
+import concurrent.futures
+import contextvars
+import functools
+import inspect
+from contextlib import contextmanager
+from typing import (
+    AsyncIterator,
+    Callable,
+    Generator,
+    Iterator,
+    List,
+    Literal,
+    Optional,
+    Sequence,
+    TypedDict,
+    TypeVar,
+    Union,
+    cast,
+)
+
+import anndata
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import pyarrow as pa
+import tiledbsoma as soma
+from typing_extensions import ParamSpec
+
+from .anndata import make_anndata
+from .axis import AxisQuery, MatrixAxisQuery
+from .types import AxisColumnNames, ExperimentQueryReadArrowResult
+
+AxisJoinIds = TypedDict(
+    "AxisJoinIds",
+    {
+        "obs": pa.Array,
+        "var": pa.Array,
+    },
+)
+
+
+class ExperimentQuery:
+    """
+    This is a prototype.
+
+    ExperimentQuery allows easy selection and extraction of data from a single soma.Measurement
+    in a soma.Experiment.
+
+    IMPORTANT: this class is not thread safe.
+
+    IMPORTANT: this query class assumes it can store the full result of both axis dataframe
+    queries in memory, and only provides incremental access to the underlying X NdArray. API
+    features such as `n_obs` and `n_vars` codify this in the API.
+
+    IMPORTANT: you must call `close()` on any instance of this class in order to release
+    underlying resources. It is strongly suggested that the context manager `experiment_query`
+    is used to make this easy/safe.
+
+    TODO: see chanzuckerberg/soma-scratch#9
+    """
+
+    experiment: soma.Experiment
+    ms: str
+    _query: MatrixAxisQuery
+    _joinids: AxisJoinIds
+    _indexer: "AxisIndexer"
+    _default_threadpool: concurrent.futures.ThreadPoolExecutor
+
+    def __init__(
+        self,
+        experiment: soma.Experiment,
+        measurement_name: str,
+        *,
+        obs_query: Optional[AxisQuery] = None,
+        var_query: Optional[AxisQuery] = None,
+    ):
+        if not experiment.exists():
+            raise ValueError("Experiment does not exist")
+        if measurement_name not in experiment.ms:
+            raise ValueError("Measurement does not exist in the experiment")
+
+        self.experiment = experiment
+        self.ms = measurement_name
+
+        self._query = {
+            "obs": obs_query if obs_query is not None else AxisQuery(coords=(slice(None),)),
+            "var": var_query if var_query is not None else AxisQuery(coords=(slice(None),)),
+        }
+        self._joinids = {
+            "obs": None,
+            "var": None,
+        }
+        self._indexer = AxisIndexer(self)
+
+        # TODO: user should be able to set this, a la asyncio loop.set_default_executor()
+        self._default_threadpool = concurrent.futures.ThreadPoolExecutor()
+
+    def close(self) -> None:
+        """
+        Cleanup and close all resources. This must be called or the thread pool
+        will not be release, etd.
+        """
+        self._default_threadpool.shutdown()
+
+    def _read_axis_dataframe(
+        self,
+        axis: Literal["obs", "var"],
+        axis_df: soma.DataFrame,
+        *,
+        column_names: Optional[Sequence[str]],
+    ) -> pa.Table:
+        """
+        Read the specified axis. Will load and save the resulting soma_joinids for that
+        axis if they are not already know.
+        """
+        query = self._query[axis]
+        need_joinids = self._joinids[axis] is None
+
+        query_columns = column_names
+        if need_joinids and column_names is not None and "soma_joinid" not in column_names:
+            query_columns = ["soma_joinid"] + list(column_names)
+
+        tbl = axis_df.read_all(ids=query.coords, value_filter=query.value_filter, column_names=query_columns)
+
+        if need_joinids:
+            self._joinids[axis] = tbl.column("soma_joinid").combine_chunks()
+        assert self._joinids[axis] is not None
+
+        if column_names is not None:
+            tbl = tbl.select(column_names)
+        return tbl
+
+    def _read_axis_joinids(self, axis: Literal["obs", "var"], axis_df: soma.DataFrame) -> pa.Array:
+        if self._joinids[axis] is None:
+            self._read_axis_dataframe(axis, axis_df, column_names=["soma_joinid"])
+        return self._joinids[axis]
+
+    def obs(self, *, column_names: Optional[Sequence[str]] = None) -> pa.Table:
+        """Return obs as an Arrow table."""
+        return self._read_axis_dataframe("obs", self.experiment.obs, column_names=column_names)
+
+    def var(self, *, column_names: Optional[Sequence[str]] = None) -> pa.Table:
+        """Return var as an Arrow table."""
+        return self._read_axis_dataframe("var", self.experiment.ms[self.ms].var, column_names=column_names)
+
+    def obs_joinids(self) -> pa.Array:
+        return self._read_axis_joinids("obs", self.experiment.obs)
+
+    def var_joinids(self) -> pa.Array:
+        return self._read_axis_joinids("var", self.experiment.ms[self.ms].var)
+
+    @property
+    def n_obs(self) -> int:
+        return len(self.obs_joinids())
+
+    @property
+    def n_vars(self) -> int:
+        return len(self.var_joinids())
+
+    def _fetchX(self, X: soma.SparseNdArray, prefetch: bool = False) -> Iterator[pa.Table]:
+        assert self._joinids["obs"] is not None
+        assert self._joinids["var"] is not None
+
+        obs_joinids = self._joinids["obs"]
+        var_joinids = self._joinids["var"]
+
+        if len(obs_joinids) == 0 or len(var_joinids) == 0:
+            return pa.Table.from_pylist([], schema=X.schema)
+
+        if not prefetch:
+            # yield for clarity
+            yield from cast(Iterator[pa.Table], X.read_table((obs_joinids, var_joinids)))
+
+        else:
+            # prefetch
+            fn = wrap_generator(X.read_table((obs_joinids, var_joinids)))
+            _prefetch_future = self._default_threadpool.submit(fn)
+            while True:
+                value, done = _prefetch_future.result()
+                if done:
+                    return
+                assert value is not None
+                _prefetch_future = self._default_threadpool.submit(fn)
+                yield value
+
+    def X(self, layer: str, prefetch: bool = False) -> Iterator[pa.Table]:
+        """
+        Return X as an iterator of Arrow Tables.
+        """
+        if not layer:
+            raise ValueError("Must specify X layer")
+        if layer not in self.experiment.ms[self.ms].X:
+            raise ValueError("Unknown X layer")
+
+        X = self.experiment.ms[self.ms].X[layer]
+        if X.soma_type != "SOMASparseNdArray":
+            raise NotImplementedError("Dense array unsupported")
+
+        futures = []
+        if not self._joinids["obs"]:
+            futures.append(self._default_threadpool.submit(self.obs_joinids))
+        if not self._joinids["var"]:
+            futures.append(self._default_threadpool.submit(self.var_joinids))
+        if futures:
+            concurrent.futures.wait(futures)
+
+        yield from self._fetchX(X, prefetch=prefetch)
+
+    def read(
+        self,
+        X_name: str,
+        *,
+        use_position_indexing: bool = False,
+        column_names: Optional[AxisColumnNames] = None,
+        X_layers: Optional[List[str]] = None,
+    ) -> ExperimentQueryReadArrowResult:
+        """
+        Read the _entire_ query result into Arrow Tables. Low-level routine
+        intended to be the basis for exporting to other in-core formats, such
+        as AnnData.
+        """
+        X_collection = self.experiment.ms[self.ms].X
+        X_layers = [] if X_layers is None else X_layers
+        all_X_names = [X_name] + X_layers
+        for _xname in all_X_names:
+            if not isinstance(_xname, str) or not _xname:
+                raise ValueError("X layer names must be specified as a string.")
+            if _xname not in X_collection:
+                raise ValueError("Unknown X layer name")
+            # TODO: dense array slicing
+            if X_collection[_xname].soma_type != "SOMASparseNdArray":
+                raise NotImplementedError("Dense array unsupported")
+
+        if column_names is None:
+            column_names = {"obs": None, "var": None}
+        if "obs" not in column_names:
+            column_names["obs"] = None
+        if "var" not in column_names:
+            column_names["var"] = None
+
+        futures = (
+            self._default_threadpool.submit(
+                self._read_axis_dataframe, "obs", self.experiment.obs, column_names=column_names["obs"]
+            ),
+            self._default_threadpool.submit(
+                self._read_axis_dataframe, "var", self.experiment.ms[self.ms].var, column_names=column_names["var"]
+            ),
+        )
+        concurrent.futures.wait(futures)
+        obs_table, var_table = (f.result() for f in futures)
+
+        X_tables = {
+            _xname: pa.concat_tables(self._fetchX(X_collection[_xname], prefetch=True)) for _xname in all_X_names
+        }
+        if use_position_indexing:
+            X_tables = self._rewrite_X_for_positional_indexing(X_tables)
+
+        X = X_tables.pop(X_name)
+        query_result: ExperimentQueryReadArrowResult = {"obs": obs_table, "var": var_table, "X": X}
+        if len(X_layers) > 0:
+            assert len(X_layers) == len(X_tables)
+            query_result["X_layers"] = X_tables
+
+        return query_result
+
+    def read_as_anndata(
+        self,
+        X_name: str,
+        *,
+        column_names: Optional[AxisColumnNames] = None,
+        X_layers: Optional[List[str]] = None,
+    ) -> anndata.AnnData:
+        """
+        Execute the query and return result as an AnnData in-memory object.
+        """
+        query_result = self.read(X_name, column_names=column_names, X_layers=X_layers, use_position_indexing=True)
+        return make_anndata(query_result)
+
+    def _rewrite_X_for_positional_indexing(self, X_tables: dict[str, pa.Table]) -> dict[str, pa.Table]:
+        """
+        This is a private convenience function to convert axis dataframe to X matrix joins
+        from `soma_joinid`-based joins to positionally indexed joins (like AnnData uses).
+
+        Input is organized as:
+            obs[i] annotates X[ obs[i].soma_joinid, : ]
+        and
+            var[j] annotates X[ :, var[j].soma_joinid ]
+
+        Output is organized as:
+            obs[i] annotates X[i, :]
+        and
+            var[j] annotates X[:, j]
+
+        In addition, the `soma_joinid` column is dropped from the axis dataframes.
+        """
+        new_X_tables = {}
+        indexer = self.get_indexer()
+        for X_name, X_table in X_tables.items():
+            new_X_tables[X_name] = pa.Table.from_arrays(
+                (
+                    indexer.obs_index(X_table["soma_dim_0"]),
+                    indexer.var_index(X_table["soma_dim_1"]),
+                    X_table["soma_data"].to_numpy(),  # as a side effect, consolidates chunks
+                ),
+                names=("_dim_0", "_dim_1", "soma_data"),
+            )
+        return new_X_tables
+
+    def get_async(self) -> "AsyncExperimentQuery":
+        return AsyncExperimentQuery(self)
+
+    def get_indexer(self) -> "AxisIndexer":
+        return self._indexer
+
+
+@contextmanager
+def experiment_query(
+    experiment: soma.Experiment,
+    measurement_name: str,
+    *,
+    obs_query: Optional[AxisQuery] = None,
+    var_query: Optional[AxisQuery] = None,
+) -> Iterator[ExperimentQuery]:
+    """
+    Context manager which simplifies use of the query by ensuring that
+    query.close() is called.
+    """
+    query = ExperimentQuery(experiment, measurement_name, obs_query=obs_query, var_query=var_query)
+    yield query
+    query.close()
+
+
+class AsyncExperimentQuery:
+    """
+    An async proxy for ExperimentQuery, allowing use with coroutines
+    """
+
+    query: ExperimentQuery
+
+    def __init__(self, query: ExperimentQuery):
+        self.query = query
+
+    def close(self) -> None:
+        self.query.close()
+
+    @property
+    def n_obs(self) -> int:
+        return self.query.n_obs
+
+    @property
+    def n_vars(self) -> int:
+        return self.query.n_vars
+
+    async def obs(self, *, column_names: Optional[Sequence[str]] = None) -> AsyncIterator[pa.Table]:
+        return await to_thread(self.query.obs, column_names=column_names)
+
+    async def var(self, *, column_names: Optional[Sequence[str]] = None) -> AsyncIterator[pa.Table]:
+        return await to_thread(self.query.var, column_names=column_names)
+
+    async def obs_joinids(self) -> pa.Array:
+        if self.query._joinids["obs"] is not None:
+            return self.query._joinids["obs"]
+        return await to_thread(self.query.obs_joinids)
+
+    async def var_joinids(self) -> pa.Array:
+        if self.query._joinids["var"] is not None:
+            return self.query._joinids["var"]
+        return await to_thread(self.query.var_joinids)
+
+    async def X(self, layer: str, prefetch: bool = False) -> AsyncIterator[pa.Table]:
+        chunk: pa.Table
+        async for chunk in async_iter((i for i in self.query.X(layer, prefetch))):
+            yield chunk
+
+
+T = TypeVar("T")
+
+
+async def async_iter(gen: Generator[T, None, None]) -> AsyncIterator[T]:
+    """
+    Convert a generator into an async coroutine
+    """
+    fn = wrap_generator(gen)
+    while True:
+        value, done = await to_thread(fn)
+        if done:
+            return
+        assert value is not None
+        yield value
+
+
+def wrap_generator(gen: Generator[T, None, None]) -> Callable[[], tuple[Optional[T], bool]]:
+    """
+    Wrap a generator, making it a "normal" function that is amenable
+    to running in a thread. Each time it is called, it returns a
+    tuple:
+        If there is another value: (next_value, False)
+        If end of iteration:       (None, True)
+    """
+    assert inspect.isgenerator(gen)
+
+    def _next() -> tuple[Optional[T], bool]:
+        try:
+            value = next(gen)
+            return value, False
+        except StopIteration:
+            return None, True
+
+    return _next
+
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+
+async def to_thread(__func: Callable[_P, _R], *args: _P.args, **kwargs: _P.kwargs) -> _R:
+    """
+    Reimplementation of asyncio.to_thread, which was introduced in Py 3.9. Added
+    here for support on earlier versions of Python.
+
+    See https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
+    """
+    loop = asyncio.events.get_running_loop()
+    ctx = contextvars.copy_context()
+    func_call = cast(Callable[..., _R], functools.partial(ctx.run, __func, *args, **kwargs))
+    return await loop.run_in_executor(None, func_call)
+
+
+class AxisIndexer:
+    """
+    Given a query, providing index-bulding services for obs/var axis.
+    """
+
+    query: ExperimentQuery
+    _obs_index: pd.Index
+    _var_index: pd.Index
+
+    def __init__(self, query: Union[ExperimentQuery, AsyncExperimentQuery]):
+        if isinstance(query, AsyncExperimentQuery):
+            query = query.query
+
+        self.query = query
+        self._obs_index = None
+        self._var_index = None
+
+    def obs_index(self, coords: Union[pa.Array, pa.ChunkedArray, npt.NDArray[np.int64]]) -> npt.NDArray[np.intp]:
+        if not isinstance(coords, np.ndarray):
+            coords = coords.to_numpy()
+        if self._obs_index is None:
+            self._obs_index = pd.Index(data=self.query.obs_joinids().to_numpy())
+        return cast(npt.NDArray[np.intp], self._obs_index.get_indexer(coords))
+
+    def var_index(self, coords: Union[pa.Array, pa.ChunkedArray, npt.NDArray[np.int64]]) -> npt.NDArray[np.intp]:
+        if not isinstance(coords, np.ndarray):
+            coords = coords.to_numpy()
+        if self._var_index is None:
+            self._var_index = pd.Index(data=self.query.var_joinids().to_numpy())
+        return cast(npt.NDArray[np.intp], self._var_index.get_indexer(coords))
diff --git a/api/python/cell_census/src/cell_census/experiment_query/types.py b/api/python/cell_census/src/cell_census/experiment_query/types.py
new file mode 100644
index 000000000..6ce9c57ed
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/experiment_query/types.py
@@ -0,0 +1,32 @@
+"""
+Types global to this module
+"""
+from typing import Optional, Sequence, TypedDict
+
+import pandas as pd
+import pyarrow as pa
+
+# Sadly, you can't define a generic TypedDict....
+
+
+class ExperimentQueryReadArrowResult(TypedDict, total=False):
+    obs: pa.Table
+    var: pa.Table
+    X: pa.Table
+    X_layers: dict[str, pa.Table]
+
+
+class ExperimentQueryReadPandasResult(TypedDict, total=False):
+    obs: pd.DataFrame
+    var: pd.DataFrame
+    X: pd.DataFrame
+    X_layers: dict[str, pd.DataFrame]
+
+
+AxisColumnNames = TypedDict(
+    "AxisColumnNames",
+    {
+        "obs": Optional[Sequence[str]],  # None is all
+        "var": Optional[Sequence[str]],
+    },
+)
diff --git a/api/python/cell_census/src/cell_census/experiment_query/util.py b/api/python/cell_census/src/cell_census/experiment_query/util.py
new file mode 100644
index 000000000..fd7471a49
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/experiment_query/util.py
@@ -0,0 +1,19 @@
+import pandas as pd
+import pyarrow as pa
+
+
+def X_as_series(tbl: pa.Table) -> pd.Series:
+    """
+    Convert SOMA 2D data from Arrow Table to Pandas Series.
+
+    NOTE: this is not zero copy.
+    """
+    data = tbl["soma_data"].to_numpy()
+    dim_0 = tbl["soma_dim_0"].to_numpy()
+    dim_1 = tbl["soma_dim_1"].to_numpy()
+    return pd.Series(
+        data,
+        pd.MultiIndex.from_arrays((dim_0, dim_1), names=("soma_dim_0", "soma_dim_1")),
+        dtype=pd.SparseDtype(data.dtype, fill_value=0),
+        name="soma_data",
+    )
diff --git a/api/python/cell_census/src/cell_census/get_anndata.py b/api/python/cell_census/src/cell_census/get_anndata.py
new file mode 100644
index 000000000..3701b39d7
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/get_anndata.py
@@ -0,0 +1,119 @@
+import re
+from typing import List, Optional, TypedDict, Union
+
+import anndata
+import tiledbsoma as soma
+
+from .experiment_query import AxisColumnNames, AxisQuery, experiment_query
+
+ObsQuery = TypedDict(
+    "ObsQuery",
+    {
+        "assay": Optional[Union[str, List[str]]],
+        "assay_ontology_term_id": Optional[Union[str, List[str]]],
+        "cell_type": Optional[Union[str, List[str]]],
+        "cell_type_ontology_term_id": Optional[Union[str, List[str]]],
+        "development_stage": Optional[Union[str, List[str]]],
+        "development_stage_ontology_term_id": Optional[Union[str, List[str]]],
+        "disease": Optional[Union[str, List[str]]],
+        "disease_ontology_term_id": Optional[Union[str, List[str]]],
+        "donor_id": Optional[Union[str, List[str]]],
+        "is_primary_data": Optional[bool],
+        "self_reported_ethnicity": Optional[Union[str, List[str]]],
+        "self_reported_ethnicity_ontology_term_id": Optional[Union[str, List[str]]],
+        "sex": Optional[Union[str, List[str]]],
+        "sex_ontology_term_id": Optional[Union[str, List[str]]],
+        "suspension_type": Optional[Union[str, List[str]]],
+        "tissue": Optional[Union[str, List[str]]],
+        "tissue_ontology_term_id": Optional[Union[str, List[str]]],
+    },
+)
+
+VarQuery = TypedDict(
+    "VarQuery",
+    {
+        "feature_id": Optional[Union[str, List[str]]],
+        "feature_name": Optional[Union[str, List[str]]],
+    },
+)
+
+
+def _build_query(query_defn: Optional[Union[ObsQuery, VarQuery]] = None) -> Optional[AxisQuery]:
+    """
+    Build a AxisQuery value filter from the user-defined query parameters.
+    """
+    if query_defn is None:
+        return None
+
+    query_conditions = []
+    for name, val in query_defn.items():
+        if isinstance(val, str):
+            query_conditions.append(f"{name} == '{val}'")
+        elif isinstance(val, list):
+            query_conditions.append(f"{name} in {val}")
+        else:
+            raise TypeError("Query must be string or list of strings")
+
+    if len(query_conditions) == 0:
+        return None
+
+    return AxisQuery(value_filter=" and ".join(query_conditions))
+
+
+def get_anndata(
+    census: soma.Collection,
+    organism: str,
+    measurement_name: str = "RNA",
+    X_name: str = "raw",
+    obs_query: Optional[ObsQuery] = None,
+    var_query: Optional[VarQuery] = None,
+    column_names: Optional[AxisColumnNames] = None,
+) -> anndata.AnnData:
+    """
+    Convience wrapper around soma.Experiment query, to build and execute a query,
+    and return it as an AnnData object.
+
+    Parameters
+    ----------
+    census : soma.Collection
+        The census object, usually returned by `cell_census.open_soma()`
+    organism : str
+        The organism to query, usually one of "Homo sapiens" or "Mus musculus"
+    measurement_name : str, default 'RNA'
+        The measurement object to query
+    X_name : str, default "raw"
+        The X layer to query
+    obs_query : dict[str, Union[str, List[str]]]
+        Obs (cell) query definition. Dict where keys are column names, and value is a
+        string or list of strings to match. All query terms must match (AND query).
+    var_query : dict[str, Union[str, List[str]]]
+        Var (gene) query definition. Dict where keys are column names, and value is a
+        string or list of strings to match. All query terms must match (AND query).
+    column_names: dict[Literal['obs', 'var'], List[str]]
+        Colums to fetch for obs and var dataframes.
+
+    Returns
+    -------
+    anndata.AnnData - containing the census slice
+
+    Examples
+    --------
+    >>> get_anndata(census, "Mus musculus", obs_query={"tissue": "brain"})
+
+    >>> get_anndata(census, "Homo sapiens", column_names={"obs": ["tissue"]})
+
+    """
+
+    # lower/snake case the organism name to find the experiment name
+    exp_name = re.sub(r"[ ]+", "_", organism).lower()
+
+    if exp_name not in census["census_data"]:
+        raise ValueError(f"Unknown organism {organism} - does not exist")
+    exp = census["census_data"][exp_name]
+    if exp.soma_type != "SOMAExperiment":
+        raise ValueError(f"Unknown organism {organism} - not a SOMA Experiment")
+
+    _obs_query = _build_query(obs_query)
+    _var_query = _build_query(var_query)
+    with experiment_query(exp, measurement_name=measurement_name, obs_query=_obs_query, var_query=_var_query) as query:
+        return query.read_as_anndata(X_name=X_name, column_names=column_names)
diff --git a/api/python/cell_census/src/cell_census/open.py b/api/python/cell_census/src/cell_census/open.py
new file mode 100644
index 000000000..543792bbb
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/open.py
@@ -0,0 +1,113 @@
+import os.path
+import urllib.parse
+from typing import Optional
+
+import s3fs
+import tiledb
+import tiledbsoma as soma
+
+from .release_directory import CensusLocator, CensusReleaseDescription, get_release_description
+from .util import uri_join
+
+# TODO: temporary work-around for lack of contenxt/config in tiledbsoma.  Replace with soma
+# `platform_config` when available.
+DEFAULT_TILEDB_CONFIGURATION = {
+    # https://docs.tiledb.com/main/how-to/configuration#configuration-parameters
+    "py.init_buffer_bytes": 1 * 1024**3,
+    "soma.init_buffer_bytes": 1 * 1024**3,
+    "py.deduplicate": "true",
+}
+
+
+def _open_soma(description: CensusReleaseDescription) -> soma.Collection:
+    locator = description["soma"]
+    tiledb_config = {**DEFAULT_TILEDB_CONFIGURATION}
+    s3_region = locator.get("s3_region", None)
+    if s3_region is not None:
+        tiledb_config["vfs.s3.region"] = locator["s3_region"]
+    return soma.Collection(uri=locator["uri"], ctx=tiledb.Ctx(tiledb_config))
+
+
+def open_soma(*, census_version: Optional[str] = "latest", uri: Optional[str] = None) -> soma.Collection:
+    """
+    Open the Cell Census by version (name) or URI, returning a soma.Collection containing
+    the top-level census.
+
+    TODO: add platform_config hook when it is further defined, allowing config overrides.
+    """
+
+    if uri is not None:
+        return soma.Collection(uri=uri, ctx=tiledb.Ctx(DEFAULT_TILEDB_CONFIGURATION))
+
+    if census_version is None:
+        raise ValueError("Must specify either a cell census version or an explicit URI.")
+
+    description = get_release_description(census_version)  # raises
+    return _open_soma(description)
+
+
+def get_source_h5ad_uri(dataset_id: str, *, census_version: str = "latest") -> CensusLocator:
+    """
+    Open the named version of the census, and return the URI for the dataset_id.
+
+    This does not guarantee that the H5AD exists or is accessible to the user.
+
+    Raises if dataset_id or census_version are unknown.
+    """
+    description = get_release_description(census_version)  # raises
+    census = _open_soma(description)
+    dataset = census["census_info"]["datasets"].read_as_pandas_all(value_filter=f"dataset_id == '{dataset_id}'")
+    if len(dataset) == 0:
+        raise KeyError("Unknown dataset_id")
+
+    locator = description["h5ads"].copy()
+    h5ads_base_uri = locator["uri"]
+    dataset_h5ad_path = dataset.dataset_h5ad_path.iloc[0]
+    locator["uri"] = uri_join(h5ads_base_uri, dataset_h5ad_path)
+    return locator
+
+
+def download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = "latest") -> None:
+    """
+    Download the source H5AD dataset, for the given dataset_id, to the user-specified
+    file name.
+
+    Will raise an error if the path already exists (i.e., will not overwrite
+    an existing file), or is not a file.
+
+    Parameters
+    ----------
+    dataset_id : str
+        Fetch the source (original) H5AD associated with this dataset_id.
+    to_path : str
+        The file name where the downloaded H5AD will be written.  Must not already exist.
+    census_version : str
+        The census version tag. Defaults to ``latest``.
+
+    Returns
+    -------
+    None
+
+    See Also
+    --------
+    get_source_h5ad_uri : Look up the location of the source H5AD.
+
+    Examples
+    --------
+    >>> download_source_h5ad("8e47ed12-c658-4252-b126-381df8d52a3d", to_path="/tmp/data.h5ad")
+
+    """
+    if os.path.exists(to_path):
+        raise ValueError("Path exists - will not overwrite existing file.")
+    if to_path.endswith("/"):
+        raise ValueError("Specify to_path as a file name, not a directory name.")
+
+    locator = get_source_h5ad_uri(dataset_id, census_version=census_version)
+    protocol = urllib.parse.urlparse(locator["uri"]).scheme
+    assert protocol == "s3"
+
+    fs = s3fs.S3FileSystem(
+        anon=True,
+        cache_regions=True,
+    )
+    fs.get_file(locator["uri"], to_path)
diff --git a/api/python/cell_census/src/cell_census/release_directory.py b/api/python/cell_census/src/cell_census/release_directory.py
new file mode 100644
index 000000000..a9cea236f
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/release_directory.py
@@ -0,0 +1,67 @@
+from typing import Dict, Optional, TypedDict, Union, cast
+
+import requests
+
+"""
+The following types describe the expected directory of Cell Census builds, used
+to bootstrap all data location requests.
+"""
+CensusReleaseTag = str  # name or version of census, eg, "release-99" or "2022-10-01-test"
+CensusLocator = TypedDict(
+    "CensusLocator",
+    {
+        "uri": str,  # resource URI
+        "s3_region": Optional[str],  # if an S3 URI, has optional region
+    },
+)
+CensusReleaseDescription = TypedDict(
+    "CensusReleaseDescription",
+    {
+        "release_date": Optional[str],  # date of release, optional
+        "release_build": str,  # date of build
+        "soma": CensusLocator,
+        "h5ads": CensusLocator,
+    },
+)
+CensusDirectory = Dict[CensusReleaseTag, Union[CensusReleaseTag, CensusReleaseDescription]]
+
+
+# URL for the default top-level directory of all public data, formatted as a CensusDirectory
+CELL_CENSUS_RELEASE_DIRECTORY_URL = "https://s3.us-west-2.amazonaws.com/cellxgene-data-public/cell-census/release.json"
+
+
+def get_release_description(tag: str) -> CensusReleaseDescription:
+    """Get release description for given tag. Raises KeyError if unknown tag value."""
+    census_directory = get_directory()
+    description = census_directory.get(tag, None)
+    if description is None:
+        raise KeyError(f"Unable to locate cell census version: {tag}.")
+    return description
+
+
+def get_directory() -> Dict[CensusReleaseTag, CensusReleaseDescription]:
+    """
+    Get the directory of cell census releases available.
+    """
+    response = requests.get(CELL_CENSUS_RELEASE_DIRECTORY_URL)
+    response.raise_for_status()
+    directory: CensusDirectory = cast(CensusDirectory, response.json())
+
+    # Resolve all aliases for easier use
+    for tag in list(directory.keys()):
+        # Strings are aliases for other tags
+        points_at = directory[tag]
+        while isinstance(points_at, str):
+            # resolve aliases
+            if points_at not in directory:
+                # oops, dangling pointer -- drop original tag
+                directory.pop(tag)
+                break
+
+            points_at = directory[points_at]
+
+        if isinstance(points_at, dict):
+            directory[tag] = points_at
+
+    # Cast is safe, as we have removed all tag aliases
+    return cast(Dict[CensusReleaseTag, CensusReleaseDescription], directory)
diff --git a/api/python/cell_census/src/cell_census/util.py b/api/python/cell_census/src/cell_census/util.py
new file mode 100644
index 000000000..360d61eab
--- /dev/null
+++ b/api/python/cell_census/src/cell_census/util.py
@@ -0,0 +1,15 @@
+import urllib.parse
+
+
+def uri_join(base: str, url: str) -> str:
+    """
+    like urllib.parse.urljoin, but doesn't get confused by S3://
+    """
+    p_url = urllib.parse.urlparse(url)
+    if p_url.netloc:
+        return url
+
+    p_base = urllib.parse.urlparse(base)
+    path = urllib.parse.urljoin(p_base.path, p_url.path)
+    parts = [p_base.scheme, p_base.netloc, path, p_url.params, p_url.query, p_url.fragment]
+    return urllib.parse.urlunparse(parts)
diff --git a/api/python/notebooks/README.md b/api/python/notebooks/README.md
new file mode 100644
index 000000000..ea39208ff
--- /dev/null
+++ b/api/python/notebooks/README.md
@@ -0,0 +1,58 @@
+# ReadMe
+
+Demonstration notebooks for the CELLxGENE Cell Census
+
+This is a quick start on how to run the notebooks.  It is Linux-flavored.
+
+## Dependencies
+
+You must be on a Linux or MacOS system, with the following installed:
+* Python 3.9+
+* C++ 17 build tools
+* cmake 3.21 or later
+* git
+* Jupyter or some other means of running notebooks (e.g., vscode)
+
+For now, it is recommended that you do all this on an host with sufficient memory,
+and a high bandwidth connection to AWS S3 in the us-west-2 region, e.g., an m6i.16xlarge.
+If you utilize AWS, Ubuntu 20 or 22 AMI are recommended (AWS AMI should work fine, but has
+not been tested).
+
+I also recommend you use a `d` instance type, and mount all of the NVME drives as swap,
+as it will keep you from running out of RAM.
+
+## Step 1: Clone Repos
+
+On your target host:
+1. Make a new working directory and `cd` into it
+2. Clone both TileDB-SOMA and soma-scratch.
+```bash
+$ git clone https://github.com/single-cell-data/TileDB-SOMA.git
+$ git clone https://github.com/chanzuckerberg/cell-census.git
+```
+
+## Step 2: Set up Python environment
+1. In your working directory, make and activate a virtual environment.
+```shell
+  $ python -m venv ./venv
+  $ source ./venv/bin/activate
+```
+2. Build and install SOMA into your virtual environment by following the instructions in `TileDB-SOMA/apis/python/README.md`
+3. Install the `cell_census` package:
+```shell
+  $ pip install -e cell-census/api/python/cell_census/
+```
+4. Install packages needed to run notebooks:
+```shell
+  $ pip install scikit-misc
+```
+
+## Verify your installation
+Check that your installation works - this make take a few seconds, as it loads metadata from S3:
+```shell
+$ python -c 'import cell_census; print(cell_census.open_soma().soma_type)'
+SOMACollection
+```
+
+## Run notebooks
+Run notebooks, which you can find in the `cell-census/api/python/notebooks` directory.
diff --git a/api/python/notebooks/census_axis_query.ipynb b/api/python/notebooks/census_axis_query.ipynb
new file mode 100644
index 000000000..9730a3fac
--- /dev/null
+++ b/api/python/notebooks/census_axis_query.ipynb
@@ -0,0 +1,281 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Axis Query Example\n",
+    "\n",
+    "_Goal:_ demonstrate basic axis metadata handling using Pandas.\n",
+    "\n",
+    "The CZ Cell Census stores obs (cell) metadata in a SOMA DataFrame, which can be queried and read as a Pandas DataFrame. The Cell Census also has a convenience package which simplifies opening the census.\n",
+    "\n",
+    "Pandas DataFrame is an in-memory object. Take care that queries are small enough for results to fit in memory.\n",
+    "\n",
+    "## Open the census\n",
+    "\n",
+    "The `cell_census` Python package contains a convenient API to open the latest version of the Cell Census."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cell_census\n",
+    "\n",
+    "census = cell_census.open_soma()\n",
+    "human = census[\"census_data\"][\"homo_sapiens\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summarize Census cell metadata\n",
+    "\n",
+    "Tips:\n",
+    "\n",
+    "- `read_as_pandas()` and `read_as_pandas_all()` return standard Pandas DataFrame objects, allowing the use of Pandas API.\n",
+    "- Queries will be much faster if you request only the DataFrame columns required for your analysis (e.g., `column_names=[\"cell_type_ontology_term_id\"]`).\n",
+    "- You can also further refine query results by using a `value_filter`\n",
+    "\n",
+    "### Example 1 - Summarize all cell types\n",
+    "\n",
+    "This example reads the cell metadata (obs) into a Pandas DataFrame, and summarizes in a variety of ways using Pandas API."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 545 cell types in the Cell Census! The first 10 are: ['CL:1000329', 'CL:0000787', 'CL:0000798', 'CL:0000909', 'CL:0000151', 'CL:1000348', 'CL:0000064', 'CL:0000576', 'CL:0000451', 'CL:0000898']\n",
+      "\n",
+      "The top 10 cell types and their counts are:\n",
+      "CL:0000679    1889047\n",
+      "CL:0000235    1374219\n",
+      "CL:0000624    1286344\n",
+      "CL:0000860    1272977\n",
+      "CL:0000625    1244993\n",
+      "CL:0000623    1031420\n",
+      "CL:0000236     945552\n",
+      "CL:0001064     797557\n",
+      "CL:0000057     741330\n",
+      "CL:0000746     731139\n",
+      "Name: cell_type_ontology_term_id, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Read into a pandas dataframe.\n",
+    "obs_df = human.obs.read_as_pandas_all(column_names=[\"cell_type_ontology_term_id\"])\n",
+    "\n",
+    "# Use Pandas API to find all unique values in the `cell_type_ontology_term_id` column.\n",
+    "unique_cell_type_ontology_term_id = obs_df.cell_type_ontology_term_id.unique()\n",
+    "\n",
+    "# Display only the first 10, as there are a LOT!\n",
+    "print(\n",
+    "    f\"There are {len(unique_cell_type_ontology_term_id)} cell types in the Cell Census! The first 10 are:\",\n",
+    "    unique_cell_type_ontology_term_id[0:10].tolist(),\n",
+    ")\n",
+    "\n",
+    "# Using Pandas API, count the instances of each cell type term and return the top 10.\n",
+    "top_10 = obs_df.cell_type_ontology_term_id.value_counts()[0:10]\n",
+    "print(\"\\nThe top 10 cell types and their counts are:\")\n",
+    "print(top_10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Summarize a subset of cell types, selected with a `value_fitler`\n",
+    "\n",
+    "This example utilizes a SOMA \"value filter\" to read the subset of cells with `tissue_ontologyy_term_id` equal to `UBERON:0002048` (lung tissue), and summarizes the query result using Pandas."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 176 cell types in the Cell Census where tissue_ontology_term_id == UBERON:0002048! The first 10 are: ['CL:0002325', 'CL:0000064', 'CL:0000875', 'CL:0000236', 'CL:0000623', 'CL:0000235', 'CL:0000084', 'CL:0000003', 'CL:0000186', 'CL:0000115']\n",
+      "\n",
+      "Top 10 cell types where tissue_ontology_term_id == UBERON:0002048\n",
+      "CL:0000235    514828\n",
+      "CL:0000583    317503\n",
+      "CL:0000624    265512\n",
+      "CL:0000625    248053\n",
+      "CL:0000003    168203\n",
+      "CL:0000623    164002\n",
+      "CL:0000860    160365\n",
+      "CL:0001064    149067\n",
+      "CL:0002063    142612\n",
+      "CL:0002632    126058\n",
+      "Name: cell_type_ontology_term_id, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Count cell_type occurrences for cells with tissue == 'lung'\n",
+    "\n",
+    "# Read cell_type terms for cells which have a specific tissue term\n",
+    "LUNG_TISSUE = \"UBERON:0002048\"\n",
+    "\n",
+    "obs_df = human.obs.read_as_pandas_all(\n",
+    "    column_names=[\"cell_type_ontology_term_id\"],\n",
+    "    value_filter=f\"tissue_ontology_term_id == '{LUNG_TISSUE}'\",\n",
+    ")\n",
+    "\n",
+    "# Use Pandas API to find all unique values in the `cell_type_ontology_term_id` column.\n",
+    "unique_cell_type_ontology_term_id = obs_df.cell_type_ontology_term_id.unique()\n",
+    "\n",
+    "print(\n",
+    "    f\"There are {len(unique_cell_type_ontology_term_id)} cell types in the Cell Census where tissue_ontology_term_id == {LUNG_TISSUE}! The first 10 are:\",\n",
+    "    unique_cell_type_ontology_term_id[0:10].tolist(),\n",
+    ")\n",
+    "\n",
+    "# Use Pandas API to count, and grab 10 most common\n",
+    "top_10 = obs_df.cell_type_ontology_term_id.value_counts()[0:10]\n",
+    "print(f\"\\nTop 10 cell types where tissue_ontology_term_id == {LUNG_TISSUE}\")\n",
+    "print(top_10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also define much more complex value filters. For example:\n",
+    "* combine terms with `and` and `or`\n",
+    "* use the `in` operator to query on multiple values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CL:0000746    158188\n",
+       "CL:0008034     84750\n",
+       "CL:0002548     79618\n",
+       "CL:0000115     64114\n",
+       "CL:0002131     61830\n",
+       "CL:0000763     31318\n",
+       "CL:0000669     27104\n",
+       "CL:0000003     22650\n",
+       "CL:0000057     19380\n",
+       "CL:0002144     18050\n",
+       "Name: cell_type_ontology_term_id, dtype: int64"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# You can also do more complex queries, such as testing for inclusion in a list of values\n",
+    "\n",
+    "VENTRICLES = [\"UBERON:0002082\", \"UBERON:OOO2084\", \"UBERON:0002080\"]\n",
+    "\n",
+    "obs_df = human.obs.read_as_pandas_all(\n",
+    "    column_names=[\"cell_type_ontology_term_id\"],\n",
+    "    value_filter=f\"tissue_ontology_term_id in {VENTRICLES}\",\n",
+    ")\n",
+    "\n",
+    "# Use Pandas API to summarize\n",
+    "top_10 = obs_df.cell_type_ontology_term_id.value_counts()[0:10]\n",
+    "display(top_10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Full census stats\n",
+    "\n",
+    "This example queries all organisms in the Census, and summarizes the diversity of various metadata lables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Complete cell census contains 37525571 cells.\n",
+      "homo_sapiens\n",
+      "\tUnique cell_type_ontology_term_id values: 545\n",
+      "\tUnique assay_ontology_term_id values: 21\n",
+      "\tUnique tissue_ontology_term_id values: 178\n",
+      "mus_musculus\n",
+      "\tUnique cell_type_ontology_term_id values: 206\n",
+      "\tUnique assay_ontology_term_id values: 8\n",
+      "\tUnique tissue_ontology_term_id values: 40\n"
+     ]
+    }
+   ],
+   "source": [
+    "COLS_TO_QUERY = [\n",
+    "    \"cell_type_ontology_term_id\",\n",
+    "    \"assay_ontology_term_id\",\n",
+    "    \"tissue_ontology_term_id\",\n",
+    "]\n",
+    "\n",
+    "obs_df = {\n",
+    "    name: experiment.obs.read_as_pandas_all(column_names=COLS_TO_QUERY)\n",
+    "    for name, experiment in census[\"census_data\"].items()\n",
+    "}\n",
+    "\n",
+    "# Use Pandas API to summarize each organism\n",
+    "print(f\"Complete cell census contains {sum(len(df) for df in obs_df.values())} cells.\")\n",
+    "for organism, df in obs_df.items():\n",
+    "    print(organism)\n",
+    "    for col in COLS_TO_QUERY:\n",
+    "        print(f\"\\tUnique {col} values: {len(df[col].unique())}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_compute_over_X.ipynb b/api/python/notebooks/census_compute_over_X.ipynb
new file mode 100644
index 000000000..c9372c129
--- /dev/null
+++ b/api/python/notebooks/census_compute_over_X.ipynb
@@ -0,0 +1,764 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Computing on X: using online algorithms\n",
+    "\n",
+    "*Goal:* demonstrate larger-than-core computation on the X matrix, using \"online\" algorithms to process data incrementally.\n",
+    "\n",
+    "This notebook computes a variety of per-gene and per-cell statistics for a user-defined query.\n",
+    "\n",
+    "*NOTE*: when query results are small, it may be easier to use the SOMAExperment Query class to extract an AnnData, and then just compute over that. This notebook is showing means of incrementally processing larger-than-core (RAM) data, where incremental (online) algorithms are used.\n",
+    "\n",
+    "\n",
+    "First, open up part of the census."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import cell_census\n",
+    "from cell_census.experiment_query import experiment_query, AxisQuery\n",
+    "\n",
+    "census = cell_census.open_soma()\n",
+    "mouse = census[\"census_data\"][\"mus_musculus\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Simple incremental (aka \"online\") calculations\n",
+    "\n",
+    "Many statistics, such as `mean`, are easy to calculate incrementally.  This cell demonstrates a query on the `X['raw']` sparse nD array, which will return results in batches. Accumulate the sum and count incrementally, into `raw_sum` and `raw_n`, and then compute mean.\n",
+    "\n",
+    "First define a query - in this case a slice over the obs axis for cells with a specific tissue & sex value, and all genes on the var axis.  The `query.X()` method returns an iterator of results, each as a PyArrow Table.  Each table will contain the sparse X data and obs/var coordinates, using standad SOMA names:\n",
+    "* `soma_data` - the X value (float32)\n",
+    "* `soma_dim_0` - the obs coordinate (int64)\n",
+    "* `soma_dim_1` - the var coordinate (int64)\n",
+    "\n",
+    "**Important**: the X matrices are joined to var/obs axis DataFrames by an integer join \"id\" (aka `soma_joinid`). They are *NOT* positionally indexed, and any given cell or gene may have a `soma_joinid` of any value (e.g., a large integer). In other words, for any given `X` value, the `soma_dim_0` corresponds to the `soma_joinid` in the `obs` dataframe, and the `soma_dim_` coordinate corresponds to the `soma_joinid` in the `var` dataframe.\n",
+    "\n",
+    "For convenience, the query package contains a utility function to simplify operations on query slices.  `query.get_indexer()` returns an indexer that can be used to wrap the output of `query.X()`, converting from `soma_joinids` to positional indexing. Positions are `[0, N)`, where `N` are the number of results on the query for any given axis (equivalent to the Pandas `.iloc` of the axis dataframe).\n",
+    "\n",
+    "Key points:\n",
+    "* it is expensive to query and read the results - so rather than make multiple passes over the data, read it once and perform multiple computations.\n",
+    "* by default, data in the census is indexed by `soma_joinid` and not positionally. Use `query.get_indexer()` if you want positions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>feature_id</th>\n",
+       "      <th>feature_name</th>\n",
+       "      <th>feature_length</th>\n",
+       "      <th>raw_n</th>\n",
+       "      <th>raw_mean</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ENSMUSG00000109644</td>\n",
+       "      <td>0610005C13Rik</td>\n",
+       "      <td>3583</td>\n",
+       "      <td>178</td>\n",
+       "      <td>0.505840</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ENSMUSG00000007777</td>\n",
+       "      <td>0610009B22Rik</td>\n",
+       "      <td>998</td>\n",
+       "      <td>4630</td>\n",
+       "      <td>43.585939</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ENSMUSG00000086714</td>\n",
+       "      <td>0610009E02Rik</td>\n",
+       "      <td>1803</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ENSMUSG00000043644</td>\n",
+       "      <td>0610009L18Rik</td>\n",
+       "      <td>619</td>\n",
+       "      <td>1370</td>\n",
+       "      <td>7.246176</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ENSMUSG00000020831</td>\n",
+       "      <td>0610010K14Rik</td>\n",
+       "      <td>1896</td>\n",
+       "      <td>9224</td>\n",
+       "      <td>95.930902</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52368</th>\n",
+       "      <td>ENSMUSG00000109857</td>\n",
+       "      <td>Gm53058</td>\n",
+       "      <td>2846</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52369</th>\n",
+       "      <td>ENSMUSG00000118578</td>\n",
+       "      <td>1700014B07Rik</td>\n",
+       "      <td>818</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52370</th>\n",
+       "      <td>ENSMUSG00000118550</td>\n",
+       "      <td>Gm52965</td>\n",
+       "      <td>1524</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52371</th>\n",
+       "      <td>ENSMUSG00000117608</td>\n",
+       "      <td>Gm53018</td>\n",
+       "      <td>2455</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52372</th>\n",
+       "      <td>ENSMUSG00000118094</td>\n",
+       "      <td>Gm52988</td>\n",
+       "      <td>4604</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>52373 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     feature_id   feature_name  feature_length  raw_n  \\\n",
+       "soma_joinid                                                             \n",
+       "0            ENSMUSG00000109644  0610005C13Rik            3583    178   \n",
+       "1            ENSMUSG00000007777  0610009B22Rik             998   4630   \n",
+       "2            ENSMUSG00000086714  0610009E02Rik            1803      0   \n",
+       "3            ENSMUSG00000043644  0610009L18Rik             619   1370   \n",
+       "4            ENSMUSG00000020831  0610010K14Rik            1896   9224   \n",
+       "...                         ...            ...             ...    ...   \n",
+       "52368        ENSMUSG00000109857        Gm53058            2846      0   \n",
+       "52369        ENSMUSG00000118578  1700014B07Rik             818      0   \n",
+       "52370        ENSMUSG00000118550        Gm52965            1524      0   \n",
+       "52371        ENSMUSG00000117608        Gm53018            2455      0   \n",
+       "52372        ENSMUSG00000118094        Gm52988            4604      0   \n",
+       "\n",
+       "              raw_mean  \n",
+       "soma_joinid             \n",
+       "0             0.505840  \n",
+       "1            43.585939  \n",
+       "2             0.000000  \n",
+       "3             7.246176  \n",
+       "4            95.930902  \n",
+       "...                ...  \n",
+       "52368         0.000000  \n",
+       "52369         0.000000  \n",
+       "52370         0.000000  \n",
+       "52371         0.000000  \n",
+       "52372         0.000000  \n",
+       "\n",
+       "[52373 rows x 5 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "with experiment_query(\n",
+    "    mouse, measurement_name=\"RNA\", obs_query=AxisQuery(value_filter=\"tissue=='brain' and sex=='male'\")\n",
+    ") as query:\n",
+    "    var_df = query.var().to_pandas().set_index(\"soma_joinid\")\n",
+    "    n_vars = len(var_df)\n",
+    "\n",
+    "    raw_n = np.zeros((n_vars,), dtype=np.int64)  # accumulate number of non-zero X values\n",
+    "    raw_sum = np.zeros((n_vars,), dtype=np.float64)  # accumulate the sum of expression\n",
+    "\n",
+    "    # query.X() returns an iterator of pyarrow.Table, with X data in COO format.\n",
+    "    # You can request an indexer from the query that will map it to positional indices\n",
+    "    indexer = query.get_indexer()\n",
+    "    for arrow_tbl in query.X(\"raw\"):\n",
+    "        var_dim = indexer.var_index(arrow_tbl[\"soma_dim_1\"])\n",
+    "        data = arrow_tbl[\"soma_data\"]\n",
+    "        np.add.at(raw_n, var_dim, 1)\n",
+    "        np.add.at(raw_sum, var_dim, data)\n",
+    "\n",
+    "with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n",
+    "    raw_mean = raw_sum / query.n_obs\n",
+    "raw_mean[np.isnan(raw_mean)] = 0\n",
+    "\n",
+    "var_df = var_df.assign(raw_n=pd.Series(data=raw_n, index=var_df.index))\n",
+    "var_df = var_df.assign(raw_mean=pd.Series(data=raw_mean, index=var_df.index))\n",
+    "\n",
+    "display(var_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Complex online calculations\n",
+    "\n",
+    "Other statistics are not as simple when implemented as an online algorithm. This cell demonstrates an implementation of an online computation of `variance`, using [Welford's online calculation of mean and variance](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm).\n",
+    "\n",
+    "This code is also available in the `cell_census.compute` module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>feature_id</th>\n",
+       "      <th>feature_name</th>\n",
+       "      <th>feature_length</th>\n",
+       "      <th>raw_mean</th>\n",
+       "      <th>raw_variance</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ENSMUSG00000109644</td>\n",
+       "      <td>0610005C13Rik</td>\n",
+       "      <td>3583</td>\n",
+       "      <td>0.505840</td>\n",
+       "      <td>440.270924</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ENSMUSG00000007777</td>\n",
+       "      <td>0610009B22Rik</td>\n",
+       "      <td>998</td>\n",
+       "      <td>43.585939</td>\n",
+       "      <td>150808.450302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ENSMUSG00000086714</td>\n",
+       "      <td>0610009E02Rik</td>\n",
+       "      <td>1803</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ENSMUSG00000043644</td>\n",
+       "      <td>0610009L18Rik</td>\n",
+       "      <td>619</td>\n",
+       "      <td>7.246176</td>\n",
+       "      <td>8274.477431</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ENSMUSG00000020831</td>\n",
+       "      <td>0610010K14Rik</td>\n",
+       "      <td>1896</td>\n",
+       "      <td>95.930902</td>\n",
+       "      <td>296255.749040</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52368</th>\n",
+       "      <td>ENSMUSG00000109857</td>\n",
+       "      <td>Gm53058</td>\n",
+       "      <td>2846</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52369</th>\n",
+       "      <td>ENSMUSG00000118578</td>\n",
+       "      <td>1700014B07Rik</td>\n",
+       "      <td>818</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52370</th>\n",
+       "      <td>ENSMUSG00000118550</td>\n",
+       "      <td>Gm52965</td>\n",
+       "      <td>1524</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52371</th>\n",
+       "      <td>ENSMUSG00000117608</td>\n",
+       "      <td>Gm53018</td>\n",
+       "      <td>2455</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52372</th>\n",
+       "      <td>ENSMUSG00000118094</td>\n",
+       "      <td>Gm52988</td>\n",
+       "      <td>4604</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>52373 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     feature_id   feature_name  feature_length   raw_mean  \\\n",
+       "soma_joinid                                                                 \n",
+       "0            ENSMUSG00000109644  0610005C13Rik            3583   0.505840   \n",
+       "1            ENSMUSG00000007777  0610009B22Rik             998  43.585939   \n",
+       "2            ENSMUSG00000086714  0610009E02Rik            1803   0.000000   \n",
+       "3            ENSMUSG00000043644  0610009L18Rik             619   7.246176   \n",
+       "4            ENSMUSG00000020831  0610010K14Rik            1896  95.930902   \n",
+       "...                         ...            ...             ...        ...   \n",
+       "52368        ENSMUSG00000109857        Gm53058            2846   0.000000   \n",
+       "52369        ENSMUSG00000118578  1700014B07Rik             818   0.000000   \n",
+       "52370        ENSMUSG00000118550        Gm52965            1524   0.000000   \n",
+       "52371        ENSMUSG00000117608        Gm53018            2455   0.000000   \n",
+       "52372        ENSMUSG00000118094        Gm52988            4604   0.000000   \n",
+       "\n",
+       "              raw_variance  \n",
+       "soma_joinid                 \n",
+       "0               440.270924  \n",
+       "1            150808.450302  \n",
+       "2                 0.000000  \n",
+       "3              8274.477431  \n",
+       "4            296255.749040  \n",
+       "...                    ...  \n",
+       "52368             0.000000  \n",
+       "52369             0.000000  \n",
+       "52370             0.000000  \n",
+       "52371             0.000000  \n",
+       "52372             0.000000  \n",
+       "\n",
+       "[52373 rows x 5 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numba\n",
+    "import numpy.typing as npt\n",
+    "\n",
+    "\n",
+    "class OnlineMatrixMeanVariance:\n",
+    "    n_samples: int\n",
+    "    n_variables: int\n",
+    "\n",
+    "    def __init__(self, n_samples: int, n_variables: int):\n",
+    "        \"\"\"\n",
+    "        Compute mean and variance for n_variables over n_samples, encoded\n",
+    "        in a COO format. Equivalent to:\n",
+    "            numpy.mean(data, axis=0)\n",
+    "            numpy.var(data, axix=0)\n",
+    "        where the input `data` is of shape (n_samples, n_variables)\n",
+    "        \"\"\"\n",
+    "        self.n_samples = n_samples\n",
+    "        self.n_variables = n_variables\n",
+    "\n",
+    "        self.n_a = np.zeros((n_variables,), dtype=np.int32)\n",
+    "        self.u_a = np.zeros((n_variables,), dtype=np.float64)\n",
+    "        self.M2_a = np.zeros((n_variables,), dtype=np.float64)\n",
+    "\n",
+    "    def update(self, coord_vec: npt.NDArray[np.int64], value_vec: npt.NDArray[np.float32]) -> None:\n",
+    "        _mean_variance_update(coord_vec, value_vec, self.n_a, self.u_a, self.M2_a)\n",
+    "\n",
+    "    def finalize(self) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:\n",
+    "        \"\"\"\n",
+    "        Returns tuple containing mean and variance\n",
+    "        \"\"\"\n",
+    "        u, M2 = _mean_variance_finalize(self.n_samples, self.n_a, self.u_a, self.M2_a)\n",
+    "\n",
+    "        # compute sample variance\n",
+    "        var = M2 / max(1, (self.n_samples - 1))\n",
+    "\n",
+    "        return u, var\n",
+    "\n",
+    "\n",
+    "@numba.jit(nopython=True)\n",
+    "def _mean_variance_update(\n",
+    "    col_arr: npt.NDArray[np.int64],\n",
+    "    val_arr: npt.NDArray[np.float32],\n",
+    "    n: npt.NDArray[np.int32],\n",
+    "    u: npt.NDArray[np.float64],\n",
+    "    M2: npt.NDArray[np.float64],\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Incrementally accumulate mean and sum of square of distance from mean using\n",
+    "    Welford's online method.\n",
+    "    \"\"\"\n",
+    "    for col, val in zip(col_arr, val_arr):\n",
+    "        u_prev = u[col]\n",
+    "        M2_prev = M2[col]\n",
+    "        n[col] += 1\n",
+    "        u[col] = u_prev + (val - u_prev) / n[col]\n",
+    "        M2[col] = M2_prev + (val - u_prev) * (val - u[col])\n",
+    "\n",
+    "\n",
+    "@numba.jit(nopython=True)\n",
+    "def _mean_variance_finalize(\n",
+    "    n_samples: int, n_a: npt.NDArray[np.int32], u_a: npt.NDArray[np.float64], M2_a: npt.NDArray[np.float64]\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Finalize incremental values, acconting for missing elements (due to sparse input).\n",
+    "    Non-sparse and sparse combined using Chan's parallel adaptation of Welford's.\n",
+    "    The code assumes the sparse elements are all zero and ignores those terms.\n",
+    "    \"\"\"\n",
+    "    n_b = n_samples - n_a\n",
+    "    delta = -u_a  # assumes u_b == 0\n",
+    "    u = (n_a * u_a) / n_samples\n",
+    "    M2 = M2_a + delta**2 * n_a * n_b / n_samples  # assumes M2_b == 0\n",
+    "    return u, M2\n",
+    "\n",
+    "\n",
+    "with experiment_query(\n",
+    "    mouse, measurement_name=\"RNA\", obs_query=AxisQuery(value_filter=\"tissue=='brain' and sex=='male'\")\n",
+    ") as query:\n",
+    "    var_df = query.var().to_pandas().set_index(\"soma_joinid\")\n",
+    "    n_vars = len(var_df)\n",
+    "\n",
+    "    indexer = query.get_indexer()\n",
+    "    mvn = OnlineMatrixMeanVariance(query.n_obs, n_vars)\n",
+    "    for arrow_tbl in query.X(\"raw\"):\n",
+    "        var_dim = indexer.var_index(arrow_tbl[\"soma_dim_1\"])\n",
+    "        data = arrow_tbl[\"soma_data\"].to_numpy()\n",
+    "        mvn.update(var_dim, data)\n",
+    "\n",
+    "    u, v = mvn.finalize()\n",
+    "\n",
+    "var_df = var_df.assign(raw_mean=pd.Series(data=u, index=var_df.index))\n",
+    "var_df = var_df.assign(raw_variance=pd.Series(data=v, index=var_df.index))\n",
+    "\n",
+    "display(var_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## A more complex example - counting cells per feature, grouped by dataset_id\n",
+    "\n",
+    "This example demonstrates a more complex example where the goal is to count the number of cells per gene, grouped by cell dataset_id.  The result is a Pandas DataFrame indexed by `obs.dataset_id` and `var.feature_id`, containing the number of cells per pair.\n",
+    "\n",
+    "This example does not use positional indexing, but rather demonstrates the use of Pandas DataFrame `join` to join on the `soma_joinid`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>n_cells</th>\n",
+       "      <th>feature_name</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>dataset_id</th>\n",
+       "      <th>feature_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>66ff82b4-9380-469c-bc4b-cfa08eacd325</th>\n",
+       "      <th>ENSMUSG00000109644</th>\n",
+       "      <td>50</td>\n",
+       "      <td>0610005C13Rik</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>98e5ea9f-16d6-47ec-a529-686e76515e39</th>\n",
+       "      <th>ENSMUSG00000109644</th>\n",
+       "      <td>146</td>\n",
+       "      <td>0610005C13Rik</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>c08f8441-4a10-4748-872a-e70c0bcccdba</th>\n",
+       "      <th>ENSMUSG00000109644</th>\n",
+       "      <td>96</td>\n",
+       "      <td>0610005C13Rik</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>66ff82b4-9380-469c-bc4b-cfa08eacd325</th>\n",
+       "      <th>ENSMUSG00000007777</th>\n",
+       "      <td>1649</td>\n",
+       "      <td>0610009B22Rik</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>98e5ea9f-16d6-47ec-a529-686e76515e39</th>\n",
+       "      <th>ENSMUSG00000007777</th>\n",
+       "      <td>3742</td>\n",
+       "      <td>0610009B22Rik</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>66ff82b4-9380-469c-bc4b-cfa08eacd325</th>\n",
+       "      <th>ENSMUSG00000117310</th>\n",
+       "      <td>2690</td>\n",
+       "      <td>Ptp4a1_ENSMUSG00000117310</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>98e5ea9f-16d6-47ec-a529-686e76515e39</th>\n",
+       "      <th>ENSMUSG00000117310</th>\n",
+       "      <td>3759</td>\n",
+       "      <td>Ptp4a1_ENSMUSG00000117310</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>c08f8441-4a10-4748-872a-e70c0bcccdba</th>\n",
+       "      <th>ENSMUSG00000117310</th>\n",
+       "      <td>1069</td>\n",
+       "      <td>Ptp4a1_ENSMUSG00000117310</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>66ff82b4-9380-469c-bc4b-cfa08eacd325</th>\n",
+       "      <th>ENSMUSG00000088025</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Rprl3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>98e5ea9f-16d6-47ec-a529-686e76515e39</th>\n",
+       "      <th>ENSMUSG00000088025</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Rprl3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>61938 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                         n_cells  \\\n",
+       "dataset_id                           feature_id                    \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000109644       50   \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000109644      146   \n",
+       "c08f8441-4a10-4748-872a-e70c0bcccdba ENSMUSG00000109644       96   \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000007777     1649   \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000007777     3742   \n",
+       "...                                                          ...   \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000117310     2690   \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000117310     3759   \n",
+       "c08f8441-4a10-4748-872a-e70c0bcccdba ENSMUSG00000117310     1069   \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000088025        1   \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000088025        1   \n",
+       "\n",
+       "                                                                      feature_name  \n",
+       "dataset_id                           feature_id                                     \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000109644              0610005C13Rik  \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000109644              0610005C13Rik  \n",
+       "c08f8441-4a10-4748-872a-e70c0bcccdba ENSMUSG00000109644              0610005C13Rik  \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000007777              0610009B22Rik  \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000007777              0610009B22Rik  \n",
+       "...                                                                            ...  \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000117310  Ptp4a1_ENSMUSG00000117310  \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000117310  Ptp4a1_ENSMUSG00000117310  \n",
+       "c08f8441-4a10-4748-872a-e70c0bcccdba ENSMUSG00000117310  Ptp4a1_ENSMUSG00000117310  \n",
+       "66ff82b4-9380-469c-bc4b-cfa08eacd325 ENSMUSG00000088025                      Rprl3  \n",
+       "98e5ea9f-16d6-47ec-a529-686e76515e39 ENSMUSG00000088025                      Rprl3  \n",
+       "\n",
+       "[61938 rows x 2 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from cell_census.experiment_query import X_as_series\n",
+    "\n",
+    "with experiment_query(\n",
+    "    mouse,\n",
+    "    measurement_name=\"RNA\",\n",
+    "    obs_query=AxisQuery(value_filter=\"tissue=='brain'\"),\n",
+    ") as query:\n",
+    "    obs_df = query.obs(column_names=[\"soma_joinid\", \"dataset_id\"]).to_pandas().set_index(\"soma_joinid\")\n",
+    "    var_df = query.var().to_pandas().set_index(\"soma_joinid\")\n",
+    "    n_cells_by_dataset = pd.Series(\n",
+    "        0,\n",
+    "        index=pd.MultiIndex.from_product(\n",
+    "            (var_df.index, obs_df.dataset_id.unique()), names=[\"soma_joinid\", \"dataset_id\"]\n",
+    "        ),\n",
+    "        dtype=np.int64,\n",
+    "        name=\"n_cells\",\n",
+    "    )\n",
+    "\n",
+    "    for X_tbl in query.X(\"raw\"):\n",
+    "        # Group by dataset_id and count unique (genes, dataset_id)\n",
+    "        value_counts = (\n",
+    "            X_as_series(X_tbl)\n",
+    "            .to_frame()\n",
+    "            .join(obs_df[[\"dataset_id\"]], on=\"soma_dim_0\")\n",
+    "            .reset_index(level=1)\n",
+    "            .drop(columns=[\"soma_data\"])\n",
+    "            .value_counts()\n",
+    "        )\n",
+    "        np.add.at(n_cells_by_dataset, n_cells_by_dataset.index.get_indexer(value_counts.index), value_counts.to_numpy())\n",
+    "\n",
+    "# drop any combinations that are not observed\n",
+    "n_cells_by_dataset = n_cells_by_dataset[n_cells_by_dataset > 0]\n",
+    "\n",
+    "# and join with var_df to pick up feature_id and feature_name\n",
+    "n_cells_by_dataset = (\n",
+    "    n_cells_by_dataset.to_frame()\n",
+    "    .reset_index(level=1)\n",
+    "    .join(var_df[[\"feature_id\", \"feature_name\"]])\n",
+    "    .set_index([\"dataset_id\", \"feature_id\"])\n",
+    ")\n",
+    "\n",
+    "display(n_cells_by_dataset)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_dataset_presence.ipynb b/api/python/notebooks/census_dataset_presence.ipynb
new file mode 100644
index 000000000..252138ab5
--- /dev/null
+++ b/api/python/notebooks/census_dataset_presence.ipynb
@@ -0,0 +1,697 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Census datasets presence\n",
+    "\n",
+    "*Goal:* demonstrate basic use of the `datasets_presence_matrix` array.\n",
+    "\n",
+    "The presence matrix is a sparse array, indicating which features (var) were present in each dataset.  The array has dimensions [n_datasets, n_var], and is stored in the SOMA Measurement `varp` collection. The first dimension is indexed by the `soma_joinid` in the `census_datasets` dataframe. The second is indexed by the `soma_joinid` in the `var` dataframe of the measurement."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th>collection_id</th>\n",
+       "      <th>collection_name</th>\n",
+       "      <th>collection_doi</th>\n",
+       "      <th>dataset_id</th>\n",
+       "      <th>dataset_title</th>\n",
+       "      <th>dataset_h5ad_path</th>\n",
+       "      <th>dataset_total_cell_count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>03f821b4-87be-4ff4-b65a-b5fc00061da7</td>\n",
+       "      <td>Local and systemic responses to SARS-CoV-2 inf...</td>\n",
+       "      <td>10.1038/s41586-021-04345-x</td>\n",
+       "      <td>edc8d3fe-153c-4e3d-8be0-2108d30f8d70</td>\n",
+       "      <td>Airway</td>\n",
+       "      <td>edc8d3fe-153c-4e3d-8be0-2108d30f8d70.h5ad</td>\n",
+       "      <td>236977</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>03f821b4-87be-4ff4-b65a-b5fc00061da7</td>\n",
+       "      <td>Local and systemic responses to SARS-CoV-2 inf...</td>\n",
+       "      <td>10.1038/s41586-021-04345-x</td>\n",
+       "      <td>2a498ace-872a-4935-984b-1afa70fd9886</td>\n",
+       "      <td>PBMC</td>\n",
+       "      <td>2a498ace-872a-4935-984b-1afa70fd9886.h5ad</td>\n",
+       "      <td>422220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>43d4bb39-21af-4d05-b973-4c1fed7b916c</td>\n",
+       "      <td>Transcriptional Programming of Normal and Infl...</td>\n",
+       "      <td>10.1016/j.celrep.2018.09.006</td>\n",
+       "      <td>f512b8b6-369d-4a85-a695-116e0806857f</td>\n",
+       "      <td>Skin</td>\n",
+       "      <td>f512b8b6-369d-4a85-a695-116e0806857f.h5ad</td>\n",
+       "      <td>68036</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>0434a9d4-85fd-4554-b8e3-cf6c582bb2fa</td>\n",
+       "      <td>Acute COVID-19 cohort across a range of WHO ca...</td>\n",
+       "      <td>10.1101/2020.11.20.20227355</td>\n",
+       "      <td>fa8605cf-f27e-44af-ac2a-476bee4410d3</td>\n",
+       "      <td>PBMCs</td>\n",
+       "      <td>fa8605cf-f27e-44af-ac2a-476bee4410d3.h5ad</td>\n",
+       "      <td>59506</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>3472f32d-4a33-48e2-aad5-666d4631bf4c</td>\n",
+       "      <td>A single-cell transcriptome atlas of the adult...</td>\n",
+       "      <td>10.15252/embj.2018100811</td>\n",
+       "      <td>d5c67a4e-a8d9-456d-a273-fa01adb1b308</td>\n",
+       "      <td>Retina</td>\n",
+       "      <td>d5c67a4e-a8d9-456d-a273-fa01adb1b308.h5ad</td>\n",
+       "      <td>19694</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>347</th>\n",
+       "      <td>347</td>\n",
+       "      <td>f70ebd97-b3bc-44fe-849d-c18e08fe773d</td>\n",
+       "      <td>A transcriptomic atlas of the mouse cerebellum...</td>\n",
+       "      <td>10.1101/2020.03.04.976407</td>\n",
+       "      <td>e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c</td>\n",
+       "      <td>A transcriptomic atlas of the mouse cerebellum</td>\n",
+       "      <td>e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c.h5ad</td>\n",
+       "      <td>611034</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>348</th>\n",
+       "      <td>348</td>\n",
+       "      <td>5d445965-6f1a-4b68-ba3a-b8f765155d3a</td>\n",
+       "      <td>A molecular cell atlas of the human lung from ...</td>\n",
+       "      <td>10.1038/s41586-020-2922-4</td>\n",
+       "      <td>e04daea4-4412-45b5-989e-76a9be070a89</td>\n",
+       "      <td>Krasnow Lab Human Lung Cell Atlas, Smart-seq2</td>\n",
+       "      <td>e04daea4-4412-45b5-989e-76a9be070a89.h5ad</td>\n",
+       "      <td>9409</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>349</th>\n",
+       "      <td>349</td>\n",
+       "      <td>5d445965-6f1a-4b68-ba3a-b8f765155d3a</td>\n",
+       "      <td>A molecular cell atlas of the human lung from ...</td>\n",
+       "      <td>10.1038/s41586-020-2922-4</td>\n",
+       "      <td>8c42cfd0-0b0a-46d5-910c-fc833d83c45e</td>\n",
+       "      <td>Krasnow Lab Human Lung Cell Atlas, 10X</td>\n",
+       "      <td>8c42cfd0-0b0a-46d5-910c-fc833d83c45e.h5ad</td>\n",
+       "      <td>65662</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>350</th>\n",
+       "      <td>350</td>\n",
+       "      <td>17481d16-ee44-49e5-bcf0-28c0780d8c4a</td>\n",
+       "      <td>Single-Cell Sequencing of Developing Human Gut...</td>\n",
+       "      <td>10.1016/j.devcel.2020.11.010</td>\n",
+       "      <td>8e47ed12-c658-4252-b126-381df8d52a3d</td>\n",
+       "      <td>Paediatric Human Gut (4-14y)</td>\n",
+       "      <td>8e47ed12-c658-4252-b126-381df8d52a3d.h5ad</td>\n",
+       "      <td>22502</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>351</th>\n",
+       "      <td>351</td>\n",
+       "      <td>17481d16-ee44-49e5-bcf0-28c0780d8c4a</td>\n",
+       "      <td>Single-Cell Sequencing of Developing Human Gut...</td>\n",
+       "      <td>10.1016/j.devcel.2020.11.010</td>\n",
+       "      <td>b46237d1-19c6-4af2-9335-9854634bad16</td>\n",
+       "      <td>Fetal Human Gut (6-11 PCW)</td>\n",
+       "      <td>b46237d1-19c6-4af2-9335-9854634bad16.h5ad</td>\n",
+       "      <td>62849</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>352 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     soma_joinid                         collection_id  \\\n",
+       "0              0  03f821b4-87be-4ff4-b65a-b5fc00061da7   \n",
+       "1              1  03f821b4-87be-4ff4-b65a-b5fc00061da7   \n",
+       "2              2  43d4bb39-21af-4d05-b973-4c1fed7b916c   \n",
+       "3              3  0434a9d4-85fd-4554-b8e3-cf6c582bb2fa   \n",
+       "4              4  3472f32d-4a33-48e2-aad5-666d4631bf4c   \n",
+       "..           ...                                   ...   \n",
+       "347          347  f70ebd97-b3bc-44fe-849d-c18e08fe773d   \n",
+       "348          348  5d445965-6f1a-4b68-ba3a-b8f765155d3a   \n",
+       "349          349  5d445965-6f1a-4b68-ba3a-b8f765155d3a   \n",
+       "350          350  17481d16-ee44-49e5-bcf0-28c0780d8c4a   \n",
+       "351          351  17481d16-ee44-49e5-bcf0-28c0780d8c4a   \n",
+       "\n",
+       "                                       collection_name  \\\n",
+       "0    Local and systemic responses to SARS-CoV-2 inf...   \n",
+       "1    Local and systemic responses to SARS-CoV-2 inf...   \n",
+       "2    Transcriptional Programming of Normal and Infl...   \n",
+       "3    Acute COVID-19 cohort across a range of WHO ca...   \n",
+       "4    A single-cell transcriptome atlas of the adult...   \n",
+       "..                                                 ...   \n",
+       "347  A transcriptomic atlas of the mouse cerebellum...   \n",
+       "348  A molecular cell atlas of the human lung from ...   \n",
+       "349  A molecular cell atlas of the human lung from ...   \n",
+       "350  Single-Cell Sequencing of Developing Human Gut...   \n",
+       "351  Single-Cell Sequencing of Developing Human Gut...   \n",
+       "\n",
+       "                   collection_doi                            dataset_id  \\\n",
+       "0      10.1038/s41586-021-04345-x  edc8d3fe-153c-4e3d-8be0-2108d30f8d70   \n",
+       "1      10.1038/s41586-021-04345-x  2a498ace-872a-4935-984b-1afa70fd9886   \n",
+       "2    10.1016/j.celrep.2018.09.006  f512b8b6-369d-4a85-a695-116e0806857f   \n",
+       "3     10.1101/2020.11.20.20227355  fa8605cf-f27e-44af-ac2a-476bee4410d3   \n",
+       "4        10.15252/embj.2018100811  d5c67a4e-a8d9-456d-a273-fa01adb1b308   \n",
+       "..                            ...                                   ...   \n",
+       "347     10.1101/2020.03.04.976407  e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c   \n",
+       "348     10.1038/s41586-020-2922-4  e04daea4-4412-45b5-989e-76a9be070a89   \n",
+       "349     10.1038/s41586-020-2922-4  8c42cfd0-0b0a-46d5-910c-fc833d83c45e   \n",
+       "350  10.1016/j.devcel.2020.11.010  8e47ed12-c658-4252-b126-381df8d52a3d   \n",
+       "351  10.1016/j.devcel.2020.11.010  b46237d1-19c6-4af2-9335-9854634bad16   \n",
+       "\n",
+       "                                      dataset_title  \\\n",
+       "0                                            Airway   \n",
+       "1                                              PBMC   \n",
+       "2                                              Skin   \n",
+       "3                                             PBMCs   \n",
+       "4                                            Retina   \n",
+       "..                                              ...   \n",
+       "347  A transcriptomic atlas of the mouse cerebellum   \n",
+       "348   Krasnow Lab Human Lung Cell Atlas, Smart-seq2   \n",
+       "349          Krasnow Lab Human Lung Cell Atlas, 10X   \n",
+       "350                    Paediatric Human Gut (4-14y)   \n",
+       "351                      Fetal Human Gut (6-11 PCW)   \n",
+       "\n",
+       "                             dataset_h5ad_path  dataset_total_cell_count  \n",
+       "0    edc8d3fe-153c-4e3d-8be0-2108d30f8d70.h5ad                    236977  \n",
+       "1    2a498ace-872a-4935-984b-1afa70fd9886.h5ad                    422220  \n",
+       "2    f512b8b6-369d-4a85-a695-116e0806857f.h5ad                     68036  \n",
+       "3    fa8605cf-f27e-44af-ac2a-476bee4410d3.h5ad                     59506  \n",
+       "4    d5c67a4e-a8d9-456d-a273-fa01adb1b308.h5ad                     19694  \n",
+       "..                                         ...                       ...  \n",
+       "347  e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c.h5ad                    611034  \n",
+       "348  e04daea4-4412-45b5-989e-76a9be070a89.h5ad                      9409  \n",
+       "349  8c42cfd0-0b0a-46d5-910c-fc833d83c45e.h5ad                     65662  \n",
+       "350  8e47ed12-c658-4252-b126-381df8d52a3d.h5ad                     22502  \n",
+       "351  b46237d1-19c6-4af2-9335-9854634bad16.h5ad                     62849  \n",
+       "\n",
+       "[352 rows x 8 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from scipy import sparse\n",
+    "import cell_census\n",
+    "\n",
+    "census = cell_census.open_soma()\n",
+    "\n",
+    "# Grab the experiment containing human data, and the measurement therein with RNA\n",
+    "human = census[\"census_data\"][\"homo_sapiens\"]\n",
+    "human_rna = human.ms[\"RNA\"]\n",
+    "\n",
+    "# The cell census-wide datasets\n",
+    "datasets_df = census[\"census_info\"][\"datasets\"].read_as_pandas_all()\n",
+    "display(datasets_df)\n",
+    "\n",
+    "# The human RNA presence matrix\n",
+    "presence = human.ms[\"RNA\"].varp[\"dataset_presence_matrix\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For convenience, read the entire presence matrix (for Homo sapiens) into a SciPy array:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<352x60564 sparse matrix of type '<class 'numpy.uint8'>'\n",
+       "\twith 7220633 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Read the entire presence matrix. It may be returned in incremental chunks if larger than\n",
+    "# read buffers, so concatenate into a single scipy.sparse.sp_matrix.\n",
+    "\n",
+    "# TODO: TileDB-Py#501 when implemented, will simplify this\n",
+    "\n",
+    "arrow_sparse_tensors = [t for t in presence.read_sparse_tensor((slice(None),))]\n",
+    "flat_arrays = [t.to_numpy() for t in arrow_sparse_tensors]\n",
+    "data = np.concatenate(tuple(t[0] for t in flat_arrays))\n",
+    "coords = np.concatenate(tuple(t[1] for t in flat_arrays))\n",
+    "presence_matrix = sparse.coo_matrix(\n",
+    "    (data.flatten(), (coords.T[0].flatten(), coords.T[1].flatten())), shape=presence.shape\n",
+    ").tocsr()\n",
+    "presence_matrix"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We also need the `var` dataframe, which is read into a Pandas DataFrame for convenient manipulation:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var_df = human_rna.var.read_as_pandas_all()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Is a feature present in a dataset?\n",
+    "\n",
+    "*Goal:* test if a given feature is present in a given dataset.\n",
+    "\n",
+    "**Important:** the presence matrix is indexed by soma_joinid, and is *NOT* positionally indexed.  In other words:\n",
+    "* the first dimension of the presence matrix is the dataset's `soma_joinid`, as stored in the `census_datasets` dataframe.\n",
+    "* the second dimension of the presence matrix is the feature's `soma_joinid`, as stored in the `var` dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Feature is present.\n"
+     ]
+    }
+   ],
+   "source": [
+    "var_joinid = var_df.loc[var_df.feature_id == \"ENSG00000286096\"].soma_joinid\n",
+    "dataset_joinid = datasets_df.loc[datasets_df.dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"].soma_joinid\n",
+    "is_present = presence_matrix[dataset_joinid, var_joinid][0, 0]\n",
+    "print(f'Feature is {\"present\" if is_present else \"not present\"}.')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## What datasets contain a feature?\n",
+    "\n",
+    "*Goal:* look up all datasets that have a feature_id present."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th>collection_id</th>\n",
+       "      <th>collection_name</th>\n",
+       "      <th>collection_doi</th>\n",
+       "      <th>dataset_id</th>\n",
+       "      <th>dataset_title</th>\n",
+       "      <th>dataset_h5ad_path</th>\n",
+       "      <th>dataset_total_cell_count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>304</th>\n",
+       "      <td>304</td>\n",
+       "      <td>e5f58829-1a66-40b5-a624-9046778e74f5</td>\n",
+       "      <td>Tabula Sapiens</td>\n",
+       "      <td>10.1126/science.abl4896</td>\n",
+       "      <td>a68b64d8-aee3-4947-81b7-36b8fe5a44d2</td>\n",
+       "      <td>Tabula Sapiens - Stromal</td>\n",
+       "      <td>a68b64d8-aee3-4947-81b7-36b8fe5a44d2.h5ad</td>\n",
+       "      <td>82478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>305</th>\n",
+       "      <td>305</td>\n",
+       "      <td>e5f58829-1a66-40b5-a624-9046778e74f5</td>\n",
+       "      <td>Tabula Sapiens</td>\n",
+       "      <td>10.1126/science.abl4896</td>\n",
+       "      <td>97a17473-e2b1-4f31-a544-44a60773e2dd</td>\n",
+       "      <td>Tabula Sapiens - Epithelial</td>\n",
+       "      <td>97a17473-e2b1-4f31-a544-44a60773e2dd.h5ad</td>\n",
+       "      <td>104148</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>306</th>\n",
+       "      <td>306</td>\n",
+       "      <td>e5f58829-1a66-40b5-a624-9046778e74f5</td>\n",
+       "      <td>Tabula Sapiens</td>\n",
+       "      <td>10.1126/science.abl4896</td>\n",
+       "      <td>c5d88abe-f23a-45fa-a534-788985e93dad</td>\n",
+       "      <td>Tabula Sapiens - Immune</td>\n",
+       "      <td>c5d88abe-f23a-45fa-a534-788985e93dad.h5ad</td>\n",
+       "      <td>264824</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>307</th>\n",
+       "      <td>307</td>\n",
+       "      <td>e5f58829-1a66-40b5-a624-9046778e74f5</td>\n",
+       "      <td>Tabula Sapiens</td>\n",
+       "      <td>10.1126/science.abl4896</td>\n",
+       "      <td>5a11f879-d1ef-458a-910c-9b0bdfca5ebf</td>\n",
+       "      <td>Tabula Sapiens - Endothelial</td>\n",
+       "      <td>5a11f879-d1ef-458a-910c-9b0bdfca5ebf.h5ad</td>\n",
+       "      <td>31691</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>308</th>\n",
+       "      <td>308</td>\n",
+       "      <td>e5f58829-1a66-40b5-a624-9046778e74f5</td>\n",
+       "      <td>Tabula Sapiens</td>\n",
+       "      <td>10.1126/science.abl4896</td>\n",
+       "      <td>53d208b0-2cfd-4366-9866-c3c6114081bc</td>\n",
+       "      <td>Tabula Sapiens - All Cells</td>\n",
+       "      <td>53d208b0-2cfd-4366-9866-c3c6114081bc.h5ad</td>\n",
+       "      <td>483152</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     soma_joinid                         collection_id collection_name  \\\n",
+       "304          304  e5f58829-1a66-40b5-a624-9046778e74f5  Tabula Sapiens   \n",
+       "305          305  e5f58829-1a66-40b5-a624-9046778e74f5  Tabula Sapiens   \n",
+       "306          306  e5f58829-1a66-40b5-a624-9046778e74f5  Tabula Sapiens   \n",
+       "307          307  e5f58829-1a66-40b5-a624-9046778e74f5  Tabula Sapiens   \n",
+       "308          308  e5f58829-1a66-40b5-a624-9046778e74f5  Tabula Sapiens   \n",
+       "\n",
+       "              collection_doi                            dataset_id  \\\n",
+       "304  10.1126/science.abl4896  a68b64d8-aee3-4947-81b7-36b8fe5a44d2   \n",
+       "305  10.1126/science.abl4896  97a17473-e2b1-4f31-a544-44a60773e2dd   \n",
+       "306  10.1126/science.abl4896  c5d88abe-f23a-45fa-a534-788985e93dad   \n",
+       "307  10.1126/science.abl4896  5a11f879-d1ef-458a-910c-9b0bdfca5ebf   \n",
+       "308  10.1126/science.abl4896  53d208b0-2cfd-4366-9866-c3c6114081bc   \n",
+       "\n",
+       "                    dataset_title                          dataset_h5ad_path  \\\n",
+       "304      Tabula Sapiens - Stromal  a68b64d8-aee3-4947-81b7-36b8fe5a44d2.h5ad   \n",
+       "305   Tabula Sapiens - Epithelial  97a17473-e2b1-4f31-a544-44a60773e2dd.h5ad   \n",
+       "306       Tabula Sapiens - Immune  c5d88abe-f23a-45fa-a534-788985e93dad.h5ad   \n",
+       "307  Tabula Sapiens - Endothelial  5a11f879-d1ef-458a-910c-9b0bdfca5ebf.h5ad   \n",
+       "308    Tabula Sapiens - All Cells  53d208b0-2cfd-4366-9866-c3c6114081bc.h5ad   \n",
+       "\n",
+       "     dataset_total_cell_count  \n",
+       "304                     82478  \n",
+       "305                    104148  \n",
+       "306                    264824  \n",
+       "307                     31691  \n",
+       "308                    483152  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Grab the feature's soma_joinid from the var dataframe\n",
+    "var_joinid = var_df.loc[var_df.feature_id == \"ENSG00000286096\"].soma_joinid\n",
+    "\n",
+    "# The presence matrix is indexed by the joinids of the dataset and var dataframes,\n",
+    "# so slice out the feature of interest by its joinid.\n",
+    "dataset_joinids = presence_matrix[:, var_joinid].tocoo().row\n",
+    "\n",
+    "# From the datasets dataframe, slice out the datasets which have a joinid in the list\n",
+    "datasets_df.loc[datasets_df.soma_joinid.isin(dataset_joinids)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## What features are in a dataset?\n",
+    "\n",
+    "*Goal:* lookup the features present in a given dataset.\n",
+    "\n",
+    "This example also demonstrates the ability to do the query on multiple datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th>feature_id</th>\n",
+       "      <th>feature_name</th>\n",
+       "      <th>feature_length</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>ENSG00000121410</td>\n",
+       "      <td>A1BG</td>\n",
+       "      <td>3999</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>ENSG00000268895</td>\n",
+       "      <td>A1BG-AS1</td>\n",
+       "      <td>3374</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>ENSG00000148584</td>\n",
+       "      <td>A1CF</td>\n",
+       "      <td>9603</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>ENSG00000175899</td>\n",
+       "      <td>A2M</td>\n",
+       "      <td>6318</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>ENSG00000245105</td>\n",
+       "      <td>A2M-AS1</td>\n",
+       "      <td>2948</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44644</th>\n",
+       "      <td>44644</td>\n",
+       "      <td>ENSG00000219926</td>\n",
+       "      <td>OR7E104P</td>\n",
+       "      <td>4672</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44648</th>\n",
+       "      <td>44648</td>\n",
+       "      <td>ENSG00000267104</td>\n",
+       "      <td>TBC1D3P1-DHX40P1</td>\n",
+       "      <td>1841</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44649</th>\n",
+       "      <td>44649</td>\n",
+       "      <td>ENSG00000265766</td>\n",
+       "      <td>CXADRP3</td>\n",
+       "      <td>1955</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44651</th>\n",
+       "      <td>44651</td>\n",
+       "      <td>ENSG00000267453</td>\n",
+       "      <td>CLEC4O</td>\n",
+       "      <td>774</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44654</th>\n",
+       "      <td>44654</td>\n",
+       "      <td>ENSG00000279274</td>\n",
+       "      <td>RP11-533E23.2</td>\n",
+       "      <td>75</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>27211 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       soma_joinid       feature_id      feature_name  feature_length\n",
+       "0                0  ENSG00000121410              A1BG            3999\n",
+       "1                1  ENSG00000268895          A1BG-AS1            3374\n",
+       "2                2  ENSG00000148584              A1CF            9603\n",
+       "3                3  ENSG00000175899               A2M            6318\n",
+       "4                4  ENSG00000245105           A2M-AS1            2948\n",
+       "...            ...              ...               ...             ...\n",
+       "44644        44644  ENSG00000219926          OR7E104P            4672\n",
+       "44648        44648  ENSG00000267104  TBC1D3P1-DHX40P1            1841\n",
+       "44649        44649  ENSG00000265766           CXADRP3            1955\n",
+       "44651        44651  ENSG00000267453            CLEC4O             774\n",
+       "44654        44654  ENSG00000279274     RP11-533E23.2              75\n",
+       "\n",
+       "[27211 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Slice the dataset(s) of interest, and get the joinid(s)\n",
+    "dataset_joinids = datasets_df.loc[datasets_df.collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"].soma_joinid\n",
+    "\n",
+    "# Slice the presence matrix by the first dimension, i.e., by dataset\n",
+    "var_joinids = presence_matrix[dataset_joinids, :].tocoo().col\n",
+    "\n",
+    "# From the feature (var) dataframe, slice out features which have a joinid in the list.\n",
+    "var_df.loc[var_df.soma_joinid.isin(var_joinids)]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_datasets.ipynb b/api/python/notebooks/census_datasets.ipynb
new file mode 100644
index 000000000..bb7f1bd72
--- /dev/null
+++ b/api/python/notebooks/census_datasets.ipynb
@@ -0,0 +1,530 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Census Datasets example\n",
+    "\n",
+    "*Goal:* demonstrate basic use of the `census_datasets` dataframe.\n",
+    "\n",
+    "Each Cell Census contains a top-level dataframe itemizing the datasets contained therein. You can read this into a Pandas DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>collection_id</th>\n",
+       "      <th>collection_name</th>\n",
+       "      <th>collection_doi</th>\n",
+       "      <th>dataset_id</th>\n",
+       "      <th>dataset_title</th>\n",
+       "      <th>dataset_h5ad_path</th>\n",
+       "      <th>dataset_total_cell_count</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>03f821b4-87be-4ff4-b65a-b5fc00061da7</td>\n",
+       "      <td>Local and systemic responses to SARS-CoV-2 inf...</td>\n",
+       "      <td>10.1038/s41586-021-04345-x</td>\n",
+       "      <td>edc8d3fe-153c-4e3d-8be0-2108d30f8d70</td>\n",
+       "      <td>Airway</td>\n",
+       "      <td>edc8d3fe-153c-4e3d-8be0-2108d30f8d70.h5ad</td>\n",
+       "      <td>236977</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>03f821b4-87be-4ff4-b65a-b5fc00061da7</td>\n",
+       "      <td>Local and systemic responses to SARS-CoV-2 inf...</td>\n",
+       "      <td>10.1038/s41586-021-04345-x</td>\n",
+       "      <td>2a498ace-872a-4935-984b-1afa70fd9886</td>\n",
+       "      <td>PBMC</td>\n",
+       "      <td>2a498ace-872a-4935-984b-1afa70fd9886.h5ad</td>\n",
+       "      <td>422220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>43d4bb39-21af-4d05-b973-4c1fed7b916c</td>\n",
+       "      <td>Transcriptional Programming of Normal and Infl...</td>\n",
+       "      <td>10.1016/j.celrep.2018.09.006</td>\n",
+       "      <td>f512b8b6-369d-4a85-a695-116e0806857f</td>\n",
+       "      <td>Skin</td>\n",
+       "      <td>f512b8b6-369d-4a85-a695-116e0806857f.h5ad</td>\n",
+       "      <td>68036</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0434a9d4-85fd-4554-b8e3-cf6c582bb2fa</td>\n",
+       "      <td>Acute COVID-19 cohort across a range of WHO ca...</td>\n",
+       "      <td>10.1101/2020.11.20.20227355</td>\n",
+       "      <td>fa8605cf-f27e-44af-ac2a-476bee4410d3</td>\n",
+       "      <td>PBMCs</td>\n",
+       "      <td>fa8605cf-f27e-44af-ac2a-476bee4410d3.h5ad</td>\n",
+       "      <td>59506</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3472f32d-4a33-48e2-aad5-666d4631bf4c</td>\n",
+       "      <td>A single-cell transcriptome atlas of the adult...</td>\n",
+       "      <td>10.15252/embj.2018100811</td>\n",
+       "      <td>d5c67a4e-a8d9-456d-a273-fa01adb1b308</td>\n",
+       "      <td>Retina</td>\n",
+       "      <td>d5c67a4e-a8d9-456d-a273-fa01adb1b308.h5ad</td>\n",
+       "      <td>19694</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>347</th>\n",
+       "      <td>f70ebd97-b3bc-44fe-849d-c18e08fe773d</td>\n",
+       "      <td>A transcriptomic atlas of the mouse cerebellum...</td>\n",
+       "      <td>10.1101/2020.03.04.976407</td>\n",
+       "      <td>e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c</td>\n",
+       "      <td>A transcriptomic atlas of the mouse cerebellum</td>\n",
+       "      <td>e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c.h5ad</td>\n",
+       "      <td>611034</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>348</th>\n",
+       "      <td>5d445965-6f1a-4b68-ba3a-b8f765155d3a</td>\n",
+       "      <td>A molecular cell atlas of the human lung from ...</td>\n",
+       "      <td>10.1038/s41586-020-2922-4</td>\n",
+       "      <td>e04daea4-4412-45b5-989e-76a9be070a89</td>\n",
+       "      <td>Krasnow Lab Human Lung Cell Atlas, Smart-seq2</td>\n",
+       "      <td>e04daea4-4412-45b5-989e-76a9be070a89.h5ad</td>\n",
+       "      <td>9409</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>349</th>\n",
+       "      <td>5d445965-6f1a-4b68-ba3a-b8f765155d3a</td>\n",
+       "      <td>A molecular cell atlas of the human lung from ...</td>\n",
+       "      <td>10.1038/s41586-020-2922-4</td>\n",
+       "      <td>8c42cfd0-0b0a-46d5-910c-fc833d83c45e</td>\n",
+       "      <td>Krasnow Lab Human Lung Cell Atlas, 10X</td>\n",
+       "      <td>8c42cfd0-0b0a-46d5-910c-fc833d83c45e.h5ad</td>\n",
+       "      <td>65662</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>350</th>\n",
+       "      <td>17481d16-ee44-49e5-bcf0-28c0780d8c4a</td>\n",
+       "      <td>Single-Cell Sequencing of Developing Human Gut...</td>\n",
+       "      <td>10.1016/j.devcel.2020.11.010</td>\n",
+       "      <td>8e47ed12-c658-4252-b126-381df8d52a3d</td>\n",
+       "      <td>Paediatric Human Gut (4-14y)</td>\n",
+       "      <td>8e47ed12-c658-4252-b126-381df8d52a3d.h5ad</td>\n",
+       "      <td>22502</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>351</th>\n",
+       "      <td>17481d16-ee44-49e5-bcf0-28c0780d8c4a</td>\n",
+       "      <td>Single-Cell Sequencing of Developing Human Gut...</td>\n",
+       "      <td>10.1016/j.devcel.2020.11.010</td>\n",
+       "      <td>b46237d1-19c6-4af2-9335-9854634bad16</td>\n",
+       "      <td>Fetal Human Gut (6-11 PCW)</td>\n",
+       "      <td>b46237d1-19c6-4af2-9335-9854634bad16.h5ad</td>\n",
+       "      <td>62849</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>352 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    collection_id  \\\n",
+       "soma_joinid                                         \n",
+       "0            03f821b4-87be-4ff4-b65a-b5fc00061da7   \n",
+       "1            03f821b4-87be-4ff4-b65a-b5fc00061da7   \n",
+       "2            43d4bb39-21af-4d05-b973-4c1fed7b916c   \n",
+       "3            0434a9d4-85fd-4554-b8e3-cf6c582bb2fa   \n",
+       "4            3472f32d-4a33-48e2-aad5-666d4631bf4c   \n",
+       "...                                           ...   \n",
+       "347          f70ebd97-b3bc-44fe-849d-c18e08fe773d   \n",
+       "348          5d445965-6f1a-4b68-ba3a-b8f765155d3a   \n",
+       "349          5d445965-6f1a-4b68-ba3a-b8f765155d3a   \n",
+       "350          17481d16-ee44-49e5-bcf0-28c0780d8c4a   \n",
+       "351          17481d16-ee44-49e5-bcf0-28c0780d8c4a   \n",
+       "\n",
+       "                                               collection_name  \\\n",
+       "soma_joinid                                                      \n",
+       "0            Local and systemic responses to SARS-CoV-2 inf...   \n",
+       "1            Local and systemic responses to SARS-CoV-2 inf...   \n",
+       "2            Transcriptional Programming of Normal and Infl...   \n",
+       "3            Acute COVID-19 cohort across a range of WHO ca...   \n",
+       "4            A single-cell transcriptome atlas of the adult...   \n",
+       "...                                                        ...   \n",
+       "347          A transcriptomic atlas of the mouse cerebellum...   \n",
+       "348          A molecular cell atlas of the human lung from ...   \n",
+       "349          A molecular cell atlas of the human lung from ...   \n",
+       "350          Single-Cell Sequencing of Developing Human Gut...   \n",
+       "351          Single-Cell Sequencing of Developing Human Gut...   \n",
+       "\n",
+       "                           collection_doi  \\\n",
+       "soma_joinid                                 \n",
+       "0              10.1038/s41586-021-04345-x   \n",
+       "1              10.1038/s41586-021-04345-x   \n",
+       "2            10.1016/j.celrep.2018.09.006   \n",
+       "3             10.1101/2020.11.20.20227355   \n",
+       "4                10.15252/embj.2018100811   \n",
+       "...                                   ...   \n",
+       "347             10.1101/2020.03.04.976407   \n",
+       "348             10.1038/s41586-020-2922-4   \n",
+       "349             10.1038/s41586-020-2922-4   \n",
+       "350          10.1016/j.devcel.2020.11.010   \n",
+       "351          10.1016/j.devcel.2020.11.010   \n",
+       "\n",
+       "                                       dataset_id  \\\n",
+       "soma_joinid                                         \n",
+       "0            edc8d3fe-153c-4e3d-8be0-2108d30f8d70   \n",
+       "1            2a498ace-872a-4935-984b-1afa70fd9886   \n",
+       "2            f512b8b6-369d-4a85-a695-116e0806857f   \n",
+       "3            fa8605cf-f27e-44af-ac2a-476bee4410d3   \n",
+       "4            d5c67a4e-a8d9-456d-a273-fa01adb1b308   \n",
+       "...                                           ...   \n",
+       "347          e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c   \n",
+       "348          e04daea4-4412-45b5-989e-76a9be070a89   \n",
+       "349          8c42cfd0-0b0a-46d5-910c-fc833d83c45e   \n",
+       "350          8e47ed12-c658-4252-b126-381df8d52a3d   \n",
+       "351          b46237d1-19c6-4af2-9335-9854634bad16   \n",
+       "\n",
+       "                                              dataset_title  \\\n",
+       "soma_joinid                                                   \n",
+       "0                                                    Airway   \n",
+       "1                                                      PBMC   \n",
+       "2                                                      Skin   \n",
+       "3                                                     PBMCs   \n",
+       "4                                                    Retina   \n",
+       "...                                                     ...   \n",
+       "347          A transcriptomic atlas of the mouse cerebellum   \n",
+       "348           Krasnow Lab Human Lung Cell Atlas, Smart-seq2   \n",
+       "349                  Krasnow Lab Human Lung Cell Atlas, 10X   \n",
+       "350                            Paediatric Human Gut (4-14y)   \n",
+       "351                              Fetal Human Gut (6-11 PCW)   \n",
+       "\n",
+       "                                     dataset_h5ad_path  \\\n",
+       "soma_joinid                                              \n",
+       "0            edc8d3fe-153c-4e3d-8be0-2108d30f8d70.h5ad   \n",
+       "1            2a498ace-872a-4935-984b-1afa70fd9886.h5ad   \n",
+       "2            f512b8b6-369d-4a85-a695-116e0806857f.h5ad   \n",
+       "3            fa8605cf-f27e-44af-ac2a-476bee4410d3.h5ad   \n",
+       "4            d5c67a4e-a8d9-456d-a273-fa01adb1b308.h5ad   \n",
+       "...                                                ...   \n",
+       "347          e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c.h5ad   \n",
+       "348          e04daea4-4412-45b5-989e-76a9be070a89.h5ad   \n",
+       "349          8c42cfd0-0b0a-46d5-910c-fc833d83c45e.h5ad   \n",
+       "350          8e47ed12-c658-4252-b126-381df8d52a3d.h5ad   \n",
+       "351          b46237d1-19c6-4af2-9335-9854634bad16.h5ad   \n",
+       "\n",
+       "             dataset_total_cell_count  \n",
+       "soma_joinid                            \n",
+       "0                              236977  \n",
+       "1                              422220  \n",
+       "2                               68036  \n",
+       "3                               59506  \n",
+       "4                               19694  \n",
+       "...                               ...  \n",
+       "347                            611034  \n",
+       "348                              9409  \n",
+       "349                             65662  \n",
+       "350                             22502  \n",
+       "351                             62849  \n",
+       "\n",
+       "[352 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import cell_census\n",
+    "from cell_census.experiment_query import experiment_query, AxisQuery\n",
+    "\n",
+    "census = cell_census.open_soma()\n",
+    "census_datasets = census[\"census_info\"][\"datasets\"].read_as_pandas_all()\n",
+    "\n",
+    "# for convenience, indexing on the soma_joinid which links this to other census data.\n",
+    "census_datasets = census_datasets.set_index(\"soma_joinid\")\n",
+    "census_datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The sum cells across all datasets should match the number of cells across all SOMA experiments (human, mouse)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Count by experiment:\n",
+      "\t34115852 cells in homo_sapiens\n",
+      "\t3409719 cells in mus_musculus\n",
+      "\n",
+      "Found 37525571 cells in all experiments.\n",
+      "Found 37525571 cells in all datasets.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Count cells across all experiments\n",
+    "all_experiments = (\n",
+    "    (organism_name, organism_experiment) for organism_name, organism_experiment in census[\"census_data\"].items()\n",
+    ")\n",
+    "experiments_total_cells = 0\n",
+    "print(\"Count by experiment:\")\n",
+    "for organism_name, organism_experiment in all_experiments:\n",
+    "    num_cells = len(organism_experiment.obs.read_as_pandas_all(column_names=[\"soma_joinid\"]))\n",
+    "    print(f\"\\t{num_cells} cells in {organism_name}\")\n",
+    "    experiments_total_cells += num_cells\n",
+    "\n",
+    "print(f\"\\nFound {experiments_total_cells} cells in all experiments.\")\n",
+    "\n",
+    "# Count cells across all datasets\n",
+    "print(f\"Found {census_datasets.dataset_total_cell_count.sum()} cells in all datasets.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lets pick one dataset to slice out of the census, and turn into an [AnnData](https://anndata.readthedocs.io/en/latest/) in-memory object. This can be used with the [ScanPy](https://scanpy.readthedocs.io/en/stable/) toolchain. You can also save this AnnData locally using the AnnData [`write`](https://anndata.readthedocs.io/en/latest/api.html#writing) API."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>collection_id</th>\n",
+       "      <th>collection_name</th>\n",
+       "      <th>collection_doi</th>\n",
+       "      <th>dataset_id</th>\n",
+       "      <th>dataset_title</th>\n",
+       "      <th>dataset_h5ad_path</th>\n",
+       "      <th>dataset_total_cell_count</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>159</th>\n",
+       "      <td>0b9d8a04-bb9d-44da-aa27-705bb65b54eb</td>\n",
+       "      <td>Tabula Muris Senis</td>\n",
+       "      <td>10.1038/s41586-020-2496-1</td>\n",
+       "      <td>0bd1a1de-3aee-40e0-b2ec-86c7a30c7149</td>\n",
+       "      <td>Bone marrow - A single-cell transcriptomic atl...</td>\n",
+       "      <td>0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad</td>\n",
+       "      <td>40220</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    collection_id     collection_name  \\\n",
+       "soma_joinid                                                             \n",
+       "159          0b9d8a04-bb9d-44da-aa27-705bb65b54eb  Tabula Muris Senis   \n",
+       "\n",
+       "                        collection_doi                            dataset_id  \\\n",
+       "soma_joinid                                                                    \n",
+       "159          10.1038/s41586-020-2496-1  0bd1a1de-3aee-40e0-b2ec-86c7a30c7149   \n",
+       "\n",
+       "                                                 dataset_title  \\\n",
+       "soma_joinid                                                      \n",
+       "159          Bone marrow - A single-cell transcriptomic atl...   \n",
+       "\n",
+       "                                     dataset_h5ad_path  \\\n",
+       "soma_joinid                                              \n",
+       "159          0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad   \n",
+       "\n",
+       "             dataset_total_cell_count  \n",
+       "soma_joinid                            \n",
+       "159                             40220  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "census_datasets[census_datasets.dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a query on the mouse experiment, \"RNA\" measurement, for the dataset_id."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 40220 × 52373\n",
+       "    obs: 'soma_joinid', 'dataset_id', 'assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue', 'tissue_ontology_term_id', 'tissue_general', 'tissue_general_ontology_term_id'\n",
+       "    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mouse = census[\"census_data\"][\"mus_musculus\"]\n",
+    "with experiment_query(\n",
+    "    mouse, \"RNA\", obs_query=AxisQuery(value_filter=\"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\")\n",
+    ") as query:\n",
+    "    adata = query.read_as_anndata(\"raw\")\n",
+    "\n",
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also use the `cell_census.get_h5ad_uri()` API to fetch a URI pointing to the H5AD associated with this `dataset_id`. This is the same H5AD you can download from the CELLxGENE Portal, and may contain additional data-submittor provided information which was not included in the Cell Census.\n",
+    "\n",
+    "The \"locator\" returned by this API will include a `uri` and additional information that may be necessary to use the URI (eg, the S3 region).\n",
+    "\n",
+    "You will need to use a download API to fetch this H5AD, such as [`fsspec`](https://filesystem-spec.readthedocs.io/en/latest/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'uri': 's3://cellxgene-data-public/cell-census/2022-11-29/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad',\n",
+       " 's3_region': 'us-west-2'}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cell_census.get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_high_variable_genes.ipynb b/api/python/notebooks/census_high_variable_genes.ipynb
new file mode 100644
index 000000000..97c50e1ca
--- /dev/null
+++ b/api/python/notebooks/census_high_variable_genes.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Computing on X: Highly Variable Genes\n",
+    "\n",
+    "*Goal:* demonstrate larger-than-core computation on X.\n",
+    "\n",
+    "This demo finds highly variable genes in a user-specified cell selection. It is similar to the [scanpy.pp.high_variable_genes](https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.highly_variable_genes.html) function, when called with `flavor='seurat_v3'`.\n",
+    "\n",
+    "*NOTE*: when query results are small, it may be easier to use the SOMAExperment Query class to extract an AnnData, and then just compute over that. This notebook is showing means of incrementally processing larger-than-core (RAM) data, where incremental (online) algorithms are used."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cell_census\n",
+    "\n",
+    "census = cell_census.open_soma()\n",
+    "human = census[\"census_data\"][\"homo_sapiens\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from cell_census.experiment_query import ExperimentQuery\n",
+    "from cell_census.compute import OnlineMatrixMeanVariance\n",
+    "\n",
+    "\n",
+    "def highly_variable_genes(query: ExperimentQuery, n_top_genes: int = 10) -> pd.DataFrame:\n",
+    "    \"\"\"\n",
+    "    Acknowledgements: scanpy highly variable genes implementation, github.com/scverse/scanpy\n",
+    "    \"\"\"\n",
+    "\n",
+    "    try:\n",
+    "        import skmisc.loess\n",
+    "    except ImportError:\n",
+    "        raise ImportError(\"Please install skmisc package via `pip install --user scikit-misc\")\n",
+    "\n",
+    "    var_df = query.var(column_names=[\"soma_joinid\", \"feature_id\", \"feature_name\"]).to_pandas().set_index(\"soma_joinid\")\n",
+    "\n",
+    "    indexer = query.get_indexer()\n",
+    "    mvn = OnlineMatrixMeanVariance(query.n_obs, query.n_vars)\n",
+    "    for arrow_tbl in query.X(\"raw\"):\n",
+    "        var_dim = indexer.var_index(arrow_tbl[\"soma_dim_1\"])\n",
+    "        data = arrow_tbl[\"soma_data\"].to_numpy()\n",
+    "        mvn.update(var_dim, data)\n",
+    "\n",
+    "    u, v = mvn.finalize()\n",
+    "    var_df = var_df.assign(means=pd.Series(u, index=var_df.index), variances=pd.Series(v, index=var_df.index))\n",
+    "\n",
+    "    estimated_variances = np.zeros((len(var_df),), dtype=np.float64)\n",
+    "    not_const = v > 0\n",
+    "    y = np.log10(v[not_const])\n",
+    "    x = np.log10(u[not_const])\n",
+    "    model = skmisc.loess.loess(x, y, span=0.3, degree=2)\n",
+    "    model.fit()\n",
+    "    estimated_variances[not_const] = model.outputs.fitted_values\n",
+    "    reg_std = np.sqrt(10**estimated_variances)\n",
+    "\n",
+    "    # A second pass over the data is required because the clip value\n",
+    "    # is determined by the first pass\n",
+    "    N = query.n_obs\n",
+    "    vmax = np.sqrt(N)\n",
+    "    clip_val = reg_std * vmax + u\n",
+    "    counts_sum = np.zeros((query.n_vars,), dtype=np.float64)  # clipped\n",
+    "    squared_counts_sum = np.zeros((query.n_vars,), dtype=np.float64)  # clipped\n",
+    "    for arrow_tbl in query.X(\"raw\"):\n",
+    "        var_dim = indexer.var_index(arrow_tbl[\"soma_dim_1\"])\n",
+    "        data = arrow_tbl[\"soma_data\"].to_numpy()\n",
+    "        # clip\n",
+    "        mask = data > clip_val[var_dim]\n",
+    "        data = data.copy()\n",
+    "        data[mask] = clip_val[var_dim[mask]]\n",
+    "        np.add.at(counts_sum, var_dim, data)\n",
+    "        np.add.at(squared_counts_sum, var_dim, data**2)\n",
+    "\n",
+    "    norm_gene_vars = (1 / ((N - 1) * np.square(reg_std))) * (\n",
+    "        (N * np.square(u)) + squared_counts_sum - 2 * counts_sum * u\n",
+    "    )\n",
+    "    norm_gene_vars = norm_gene_vars.reshape(1, -1)\n",
+    "\n",
+    "    # argsort twice gives ranks, small rank means most variable\n",
+    "    ranked_norm_gene_vars = np.argsort(np.argsort(-norm_gene_vars, axis=1), axis=1)\n",
+    "\n",
+    "    # this is done in SelectIntegrationFeatures() in Seurat v3\n",
+    "    ranked_norm_gene_vars = ranked_norm_gene_vars.astype(np.float32)\n",
+    "    num_batches_high_var = np.sum((ranked_norm_gene_vars < n_top_genes).astype(int), axis=0)\n",
+    "    ranked_norm_gene_vars[ranked_norm_gene_vars >= n_top_genes] = np.nan\n",
+    "    ma_ranked = np.ma.masked_invalid(ranked_norm_gene_vars)\n",
+    "    median_ranked = np.ma.median(ma_ranked, axis=0).filled(np.nan)\n",
+    "\n",
+    "    var_df = var_df.assign(\n",
+    "        highly_variable_nbatches=pd.Series(num_batches_high_var, index=var_df.index),\n",
+    "        highly_variable_rank=pd.Series(median_ranked, index=var_df.index),\n",
+    "        variances_norm=pd.Series(np.mean(norm_gene_vars, axis=0), index=var_df.index),\n",
+    "    )\n",
+    "\n",
+    "    sorted_index = (\n",
+    "        var_df[[\"highly_variable_rank\", \"highly_variable_nbatches\"]]\n",
+    "        .sort_values(\n",
+    "            [\"highly_variable_rank\", \"highly_variable_nbatches\"],\n",
+    "            ascending=[True, False],\n",
+    "            na_position=\"last\",\n",
+    "        )\n",
+    "        .index\n",
+    "    )\n",
+    "    var_df[\"highly_variable\"] = False\n",
+    "    var_df = var_df.drop(columns=[\"highly_variable_nbatches\"])\n",
+    "    var_df.loc[sorted_index[: int(n_top_genes)], \"highly_variable\"] = True\n",
+    "    return var_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use this funtion, which is also available in `cell_census.compute`, open a ExperimentQuery, and pass to the function as a parameter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>feature_id</th>\n",
+       "      <th>feature_name</th>\n",
+       "      <th>means</th>\n",
+       "      <th>variances</th>\n",
+       "      <th>highly_variable_rank</th>\n",
+       "      <th>variances_norm</th>\n",
+       "      <th>highly_variable</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>soma_joinid</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>14173</th>\n",
+       "      <td>ENSG00000011465</td>\n",
+       "      <td>DCN</td>\n",
+       "      <td>1.469809</td>\n",
+       "      <td>2.799015e+04</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>19.943697</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17553</th>\n",
+       "      <td>ENSG00000234745</td>\n",
+       "      <td>HLA-B</td>\n",
+       "      <td>2.593248</td>\n",
+       "      <td>6.411800e+04</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>20.669138</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17949</th>\n",
+       "      <td>ENSG00000185885</td>\n",
+       "      <td>IFITM1</td>\n",
+       "      <td>5.726884</td>\n",
+       "      <td>4.180694e+05</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>26.027514</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18020</th>\n",
+       "      <td>ENSG00000163453</td>\n",
+       "      <td>IGFBP7</td>\n",
+       "      <td>9.272080</td>\n",
+       "      <td>1.508529e+06</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>25.324222</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22441</th>\n",
+       "      <td>ENSG00000111341</td>\n",
+       "      <td>MGP</td>\n",
+       "      <td>3.334318</td>\n",
+       "      <td>2.661684e+05</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>20.188246</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28680</th>\n",
+       "      <td>ENSG00000112562</td>\n",
+       "      <td>SMOC2</td>\n",
+       "      <td>0.545529</td>\n",
+       "      <td>5.397097e+03</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>20.102556</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29507</th>\n",
+       "      <td>ENSG00000105467</td>\n",
+       "      <td>SYNGR4</td>\n",
+       "      <td>0.883361</td>\n",
+       "      <td>3.483089e+04</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>20.313564</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40758</th>\n",
+       "      <td>ENSG00000253504</td>\n",
+       "      <td>MTCYBP19</td>\n",
+       "      <td>5.558538</td>\n",
+       "      <td>3.219079e+05</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>24.388447</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42651</th>\n",
+       "      <td>ENSG00000270225</td>\n",
+       "      <td>MTCO2P22</td>\n",
+       "      <td>8.855429</td>\n",
+       "      <td>9.335230e+05</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>23.784311</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52214</th>\n",
+       "      <td>ENSG00000232179</td>\n",
+       "      <td>MTATP6P29</td>\n",
+       "      <td>6.475613</td>\n",
+       "      <td>7.000857e+05</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>21.696998</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  feature_id feature_name     means     variances  \\\n",
+       "soma_joinid                                                         \n",
+       "14173        ENSG00000011465          DCN  1.469809  2.799015e+04   \n",
+       "17553        ENSG00000234745        HLA-B  2.593248  6.411800e+04   \n",
+       "17949        ENSG00000185885       IFITM1  5.726884  4.180694e+05   \n",
+       "18020        ENSG00000163453       IGFBP7  9.272080  1.508529e+06   \n",
+       "22441        ENSG00000111341          MGP  3.334318  2.661684e+05   \n",
+       "28680        ENSG00000112562        SMOC2  0.545529  5.397097e+03   \n",
+       "29507        ENSG00000105467       SYNGR4  0.883361  3.483089e+04   \n",
+       "40758        ENSG00000253504     MTCYBP19  5.558538  3.219079e+05   \n",
+       "42651        ENSG00000270225     MTCO2P22  8.855429  9.335230e+05   \n",
+       "52214        ENSG00000232179    MTATP6P29  6.475613  7.000857e+05   \n",
+       "\n",
+       "             highly_variable_rank  variances_norm  highly_variable  \n",
+       "soma_joinid                                                         \n",
+       "14173                         9.0       19.943697             True  \n",
+       "17553                         5.0       20.669138             True  \n",
+       "17949                         0.0       26.027514             True  \n",
+       "18020                         1.0       25.324222             True  \n",
+       "22441                         7.0       20.188246             True  \n",
+       "28680                         8.0       20.102556             True  \n",
+       "29507                         6.0       20.313564             True  \n",
+       "40758                         2.0       24.388447             True  \n",
+       "42651                         3.0       23.784311             True  \n",
+       "52214                         4.0       21.696998             True  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from cell_census.experiment_query import experiment_query, AxisQuery\n",
+    "\n",
+    "with experiment_query(human, measurement_name=\"RNA\", obs_query=AxisQuery(value_filter=\"tissue == 'brain'\")) as query:\n",
+    "    hvg = highly_variable_genes(query)\n",
+    "\n",
+    "display(hvg[hvg.highly_variable])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_query_extract.ipynb b/api/python/notebooks/census_query_extract.ipynb
new file mode 100644
index 000000000..5ad237a0a
--- /dev/null
+++ b/api/python/notebooks/census_query_extract.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cell Census query & extract subsets\n",
+    "\n",
+    "_Goal:_ demonstrate the ability to query subsets of the Cell Census based upon user-defined obs/var metadata, and extract those slices into in-memory data structures for further analysis.\n",
+    "\n",
+    "**NOTE:** all examples in this notebook assume that sufficient memory exists on the host machine to store query results. There are other notebooks which provide examples for out-of-core processing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cell_census\n",
+    "\n",
+    "census = cell_census.open_soma(census_version=\"latest\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The Cell Census includes SOMA Experiments for both human and mouse. These experiments can be queried based upon metadata values (eg, tissue type), and the query result can be extracted into a variety of formats.\n",
+    "\n",
+    "> ⚠️ **NOTE:** The following is experimental query code. It is is built upon SOMA, but not (yet) part of SOMA. If it becomes sufficiently useful, we plan to propose it as a SOMA extension.\n",
+    "\n",
+    "Basic idea:\n",
+    "\n",
+    "- define per-axis (i.e., obs, var) query criteria\n",
+    "- specify the experiment and measurement name to be queried\n",
+    "- specify the column names you want as part of the results\n",
+    "- and read the query result _into an in-memory format_.\n",
+    "\n",
+    "This utilizes the SOMA `value_filter` query language. Keep in mind that the results must fit into memory, so it is best to define a selective query _and_ only fetch those axis metadata columns which are necessary.\n",
+    "\n",
+    "The `cell_census` package includes a convenience function to extract a slice of the Census and read into an [AnnData](https://anndata.readthedocs.io/en/latest/), for use with [ScanPy](https://scanpy.readthedocs.io/en/stable/). This function accepts a variety of arguments, including:\n",
+    "* the organism to slice\n",
+    "* the per-axis slice criteria\n",
+    "* the columns to fetch and include in the AnnData\n",
+    "\n",
+    "For more complex query scenarios, there is an advanced query API demonstrated in other notebooks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 119269 × 60564\n",
+       "    obs: 'soma_joinid', 'dataset_id', 'assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue', 'tissue_ontology_term_id', 'tissue_general', 'tissue_general_ontology_term_id'\n",
+       "    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Define a simple obs-axis query for all cells where tissue is UBERON:0001264 and sex is PATO:0000383.\n",
+    "\n",
+    "adata = cell_census.get_anndata(\n",
+    "    census,\n",
+    "    \"Homo sapiens\",\n",
+    "    obs_query={\n",
+    "        \"tissue_ontology_term_id\": \"UBERON:0002048\",\n",
+    "        \"sex_ontology_term_id\": \"PATO:0000383\",\n",
+    "        \"cell_type_ontology_term_id\": [\"CL:0002063\", \"CL:0000499\"],\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "display(adata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 41332 × 3\n",
+       "    obs: 'tissue', 'cell_type', 'sex'\n",
+       "    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# You can also query on both axis. This example adds a var-axis query for a handful of genes, and queries the mouse experiment.\n",
+    "\n",
+    "adata = cell_census.get_anndata(\n",
+    "    census,\n",
+    "    \"Mus musculus\",\n",
+    "    obs_query={\"tissue\": \"brain\"},\n",
+    "    var_query={\"feature_name\": [\"Gm16259\", \"Dcaf5\", \"Gm53058\"]},\n",
+    "    column_names={\"obs\": [\"tissue\", \"cell_type\", \"sex\"]},\n",
+    ")\n",
+    "\n",
+    "display(adata)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_rank_gene_groups.ipynb b/api/python/notebooks/census_rank_gene_groups.ipynb
new file mode 100644
index 000000000..3e277579a
--- /dev/null
+++ b/api/python/notebooks/census_rank_gene_groups.ipynb
@@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cell Census - demo ScanPy rank_gene_groups\n",
+    "\n",
+    "_Goal_: demonstrate a simple student's t-test between two medium-size (i.e., all of the extracted data fits into memory) \"obs\" metadata queries/slices, using [scanpy.tl.rank_gene_groups](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html#scanpy-tl-rank-genes-groups).\n",
+    "\n",
+    "**NOTE:** all examples in this notebook assume that sufficient memory exists on the host machine to store query results. There are other notebooks which provide examples for out-of-core processing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cell_census\n",
+    "from cell_census.experiment_query import AxisQuery, experiment_query\n",
+    "\n",
+    "census = cell_census.open_soma(census_version=\"latest\")\n",
+    "human = census[\"census_data\"][\"homo_sapiens\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As a first step, query and read a slice of data into an AnnData. In this example, lung cells (UBERON:0002048) labelled as fibroblast (CL:0000057) and natural killer cells (CL:0000623) are selected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object with n_obs × n_vars = 195477 × 60564\n",
+       "    obs: 'cell_type_ontology_term_id', 'tissue_ontology_term_id'\n",
+       "    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_length'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with experiment_query(\n",
+    "    human,\n",
+    "    \"RNA\",\n",
+    "    obs_query=AxisQuery(\n",
+    "        value_filter=\"tissue_ontology_term_id == 'UBERON:0002048' and cell_type_ontology_term_id in ['CL:0000057', 'CL:0000623']\"\n",
+    "    ),\n",
+    ") as query:\n",
+    "    adata = query.read_as_anndata(\n",
+    "        X_name=\"raw\",\n",
+    "        column_names={\n",
+    "            \"obs\": [\n",
+    "                \"cell_type_ontology_term_id\",\n",
+    "                \"tissue_ontology_term_id\",\n",
+    "            ],\n",
+    "            \"var\": None,\n",
+    "        },\n",
+    "    )\n",
+    "\n",
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With the data read into an AnnData, use the ScanPy API to compute the rank genes groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAABCsAAAHHCAYAAACWddIPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAACpqUlEQVR4nOzdeVyN+fs/8Fe7tVJp1aZCFCVLWYrKGDIYSfbdMMMwMobsYx9jmIyPbabsMyTDGEOkGvsuhLKEoiRKGy3K/fujX+frqEzd56STXs/H4zw+p/u+z3Wuc47P3Pe5zvt9vZUEQRBARERERERERKQglKs6ASIiIiIiIiKit7FYQUREREREREQKhcUKIiIiIiIiIlIoLFYQERERERERkUJhsYKIiIiIiIiIFAqLFURERERERESkUFisICIiIiIiIiKFwmIFERERERERESkUFiuIiIiIiIiISKGwWEFERERERERECoXFCiIZxcXFYfz48WjcuDFq1aoFTU1NdOzYEQEBAcjJyZEcZ2FhgV69eol+njNnzqBTp06oU6cODA0NMXnyZGRnZ5c4Li8vDzNmzICxsTFq166N9u3bIywsTKFi/vvvv1BSUir1du7cOclxDx8+LPM4JSUljBs3riJv4Ufl0KFDWLBgQVWnQUREVYjXINLy8/OxdOlSNGvWDLVq1YKBgQG8vLzw+PFjyTEXL17EpEmT0KJFC9StWxdmZmYYMGAA7ty5UyLer7/+Cjc3NxgYGEBDQwOWlpYYNWoUHj58WM537uN069YtLFiwoMa/D1T5VKs6AaLq7J9//oGPjw80NDQwfPhw2NnZIT8/H6dOncL06dNx8+ZNbNq0SebnuXr1Kjw8PGBra4tVq1bh8ePHWLlyJe7evYvDhw9LHTty5EiEhITgm2++gY2NDbZs2YKePXsiMjISnTp1UpiYADB58mS0bdtWapu1tbXkfsOGDbF9+/YSjwsNDcXOnTvxySefVOyN/IgcOnQI//vf/1iwICKqoXgNIh3z9evX8PLywpkzZzBu3Di0bNkSL168wPnz55GRkYFGjRoBAH744QecPn0aPj4+aNmyJZKTk7F27Vq0bt0a586dg52dnSRmVFQULC0t0bt3bzRo0AAPHjzAr7/+ioMHD+LatWswNjaW+f2tjm7duoXvv/8eXbp0gYWFRVWnQx8zgYhEuX//vlCvXj2hWbNmQlJSUon9d+/eFX7++WfJ3+bm5oKXl5eo5+rRo4dgZGQkZGRkSLb9+uuvAgDhyJEjkm3nz58XAAg//vijZFtOTo5gZWUluLi4KEzMyMhIAYCwZ88eMW+H4OHhIWhqago5OTmiHv+hvX79WsjLy5NrzIkTJwr8TzgRUc3Ea5CSMX/44QdBTU1NOH/+/Htfz+nTp0uck+/cuSNoaGgIQ4YMee9jBUEQLl26JAAQli1b9p/HKoI3b94Ir169kmvMPXv2CACEyMhIucYlehevdIlEmjBhggBAOH36dLmOL8+FwsuXL4WYmBjh2bNnkm0ZGRmCqqqqMH36dKlj8/LyhHr16gljxoyRbJs+fbqgoqIidfIXBEFYunSpAEBISEhQiJhvFysyMzOF169fv/d9eVtSUpKgrKwsjBw58r3HJScnCyoqKsKCBQtK7IuNjRUACL/88osgCIKQn58vLFiwQLC2thY0NDQEHR0doWPHjsLRo0fLnVexBw8eSC6sVq9eLTRu3FhQVlYWoqKiBEEQhJiYGMHb21to0KCBoKGhITg5OQl//fWXVIz/ymfEiBECgBI3IiKqGXgNIh2zsLBQMDY2FgYMGCAIQtGPBC9fvnzv631X69athdatW//ncc+fPxcACDNmzHjvcS1atBC6dOlSYntxrt7e3pJtf/zxh9C6dWuhXr16Qv369QU7OzupYlNFFH/WoaGhgpOTk6ChoSGsXr1aEARBePHihTBlyhShUaNGgrq6umBlZSUsX75cKCwslIrxvnw2b95c6jUICxdUGdizgkikv//+G40bN0aHDh3kFvPChQuwtbXF2rVrJduio6NRUFCANm3aSB2rrq4OBwcHREVFSbZFRUWhSZMm0NTUlDq2Xbt2AIqGXSpCzGKjRo2CpqYmatWqha5du+LSpUtlvjfFdu3ahTdv3mDIkCHvPc7AwABubm4IDg4usW/37t1QUVGBj48PAGDBggX4/vvv0bVrV6xduxazZ8+GmZkZrly58p/5lGXz5s345Zdf8MUXX+Cnn36Cjo4Obt68CWdnZ8TExGDmzJn46aefULduXfTt2xf79u2TPPa/8hk/fjy6desGANi+fbvkRkRENQOvQaRj3rp1C0lJSWjZsiW++OIL1K1bF3Xr1kXLli0RGRn5n69dEAQ8ffoUenp6pe5PTU1FSkoKLl26hFGjRgEAPDw83hvT19cXJ06cQHJystT2U6dOISkpCQMHDgQAhIWFYdCgQWjQoAF++OEHLF++HF26dMHp06f/M++y3L59G4MGDUK3bt0QEBAABwcHvHr1Cm5ubtixYweGDx+ONWvWoGPHjvD394efn5/ksf+Vj6urKyZPngwAmDVrluQaxNbWVnS+RGVhzwoiETIzM5GYmIg+ffpU+nM9efIEAGBkZFRin5GREU6ePCl1bFnHAUBSUpJCxFRXV4e3tzd69uwJPT093Lp1CytXrkTnzp1x5swZODo6lohRbOfOnTAyMoK7u3uZxxTz9fXF+PHjcePGDak5qLt375Y0zAKK5v327NlTLnN7iz1+/Bj37t1Dw4YNJds8PT1hZmaGixcvQkNDAwDw1VdfoVOnTpgxYwY+//zzcuXj4uKCJk2aICwsDEOHDpVbzkREpPh4DVIy5t27dwEAq1evho6ODjZu3AgAWLp0KT799FNcvHgRLVu2LPN17ty5E4mJiVi4cGGp+01MTJCXlwcA0NXVxZo1ayQ/GpTF19cX8+bNQ0hICCZNmiTZvnv3btSrVw9eXl4Ais75mpqaOHLkCFRUVN4bs7zu3buH0NBQdO/eXbJt8eLFiIuLQ1RUFGxsbAAU/fhhbGyMH3/8EdOmTYOpqel/5tO4cWN07txZ8h506dJFLjkTlYYjK4hEyMzMBADUr19frnG7dOkCQRCkmiYWd/Mu/nL7tlq1akl1+87JySnzuLdjVXXMDh06ICQkBKNHj0bv3r0xc+ZMnDt3DkpKSvD39y/x+GJ37tzB5cuXMXDgQCgr//d/vvr16wdVVVXs3r1bsu3GjRu4desWfH19Jdu0tbVx8+ZNycWOPHh7e0sVKtLS0hAREYEBAwYgKysLz58/x/Pnz5Gamoru3bvj7t27SExMrLR8iIjo48BrkJIxi1cRycrKQnh4OEaOHImRI0fi2LFjEAQBK1asKPN1x8bGYuLEiXBxccGIESNKPebw4cM4dOgQfvrpJ5iZmeHly5dlxivWpEkTODg4SF2DFBYWIiQkBJ999hlq164NoOic//LlyzJXOBHD0tJSqlABAHv27EHnzp3RoEEDyTXI8+fP4enpicLCQpw4caLS8iESi8UKIhGKhyNmZWVV+nMVn8yKK/pvy83NlewvPras496OVdUxS2NtbY0+ffogMjIShYWFpR6zc+dOAPjPKSDF9PT04OHhITUVZPfu3VBVVUW/fv0k2xYuXIj09HQ0adIE9vb2mD59Oq5fv16u5yiLpaWl1N/37t2DIAiYO3cuGjZsKHWbP38+ACAlJaXS8iEioo8Dr0HKjtmxY0eYmppKjjMzM0OnTp1w5syZUl9fcnIyvLy8oKWlhZCQkDJHNnTt2hU9evSAn58f9uzZg++//15qukxZfH19cfr0acmPEf/++y9SUlKkfjD56quv0KRJE/To0QONGjXC6NGjERoa+p+x3+fdaxCgaPRJaGhoiWsQT09PAP93DVIZ+RCJxWIFkQiampowNjbGjRs3Kv25ioc6Fg+bfNuTJ0+kls0yMjIq8zgAkmOrOmZZTE1NkZ+fX+YvFr///juaNm0KJyen/4xVbODAgbhz545kXmtwcDA8PDyk5qW6uroiLi4OQUFBsLOzw2+//YbWrVvjt99+K/fzvOvd4sybN28AAN9++y3CwsJKvRUv21oZ+RAR0ceB1yAlYxb/b/H0zrfp6+vjxYsXJbZnZGSgR48eSE9PR2hoaLmXIbWysoKjo6PkB5T38fX1hSAI2LNnD4CiaxAtLS18+umnUvldvXoVBw4cQO/evREZGYkePXqUOcqjPEr7gejNmzfo1q1bmdcg3t7elZYPkVgsVhCJ1KtXL8TFxeHs2bOV+jx2dnZQVVUt0XwyPz8fV69ehYODg2Sbg4MD7ty5IxkiWuz8+fOS/YoQsyz3799HrVq1UK9evRL7zp8/j3v37pV7VEWxvn37Ql1dHbt378bVq1dx584dSVOrt+no6GDUqFH4448/8OjRI7Rs2VJqKKysGjduDABQU1ODp6dnqbe3h/T+Vz5KSkpyy42IiKoXXoNIx7S3t4eamppkBMPbkpKSpKZlAkUjMz777DPcuXMHBw8eRPPmzd//RrwjJycHGRkZ/3mcpaUl2rVrh927d6OgoAB//vkn+vbtW2Jqi7q6Oj777DOsW7cOcXFxGD9+PLZt24Z79+5VKK/3sbKyQnZ2dpnXIGZmZuXOh9cg9KGwWEEk0nfffYe6deti7NixePr0aYn9cXFxCAgIqFDMV69eITY2Fs+fP5ds09LSgqenJ3bs2CE15HP79u3Izs6WrGgBAP3790dhYaFUY8a8vDxs3rwZ7du3lwyNrOqYz549K/Har127hgMHDuCTTz4ptR/F77//DgAYPHjw+97CErS1tdG9e3cEBwdj165dUFdXR9++faWOSU1Nlfq7Xr16sLa2lhp6mpGRgdjY2HJdnJRGX18fXbp0wcaNG0v9lejt96Q8+dStWxcAkJ6eLiofIiKqvngNIh2zfv366NmzJ86cOYPY2FjJsTExMThz5oxUM8zCwkL4+vri7Nmz2LNnD1xcXEp9PwoKCkodkXHhwgVER0eXWM2kLL6+vjh37hyCgoLw/PlzqSkgQMlzvrKysqQZaPF5//Xr14iNjS31+qG8BgwYgLNnz+LIkSMl9qWnp6OgoKDc+fAahD4UJUEQhKpOgqi6OnDgAHx9fVG7dm0MHz4cdnZ2yM/Px5kzZ7Bnzx6MHDlS0pHawsICtWrVKnX1BkdHR3h5eeHff/9F165dMX/+fKlf0a9cuYIOHTqgefPm+OKLL/D48WP89NNPcHV1LXHSGTBgAPbt24epU6fC2toaW7duxYULFxAeHg5XV1eFiOnu7o7atWujQ4cO0NfXx61bt7Bp0yaoqanh7NmzJZa/KiwshImJCSwtLUX9irRz504MHToU9evXR5cuXXDgwAGp/QYGBujSpQucnJygo6ODS5cuYdOmTZg0aRLWrFkDANiyZQtGjRqFzZs3Y+TIkWU+18OHD2FpaYkff/wR3377rdS+W7duoVOnTlBWVsa4cePQuHFjPH36FGfPnsXjx49x7dq1cuezZ88eDBgwAMOGDUP37t2hoqJS6ogRIiL6OPEaRDrmrVu30L59e9SvX1+ytOaaNWtQUFCAqKgomJiYAAC++eYbBAQE4LPPPsOAAQNKvB/F71F6ejoaNWoEX19ftGjRAnXr1kV0dDQ2b96MWrVq4dy5c5JVNd7n8ePHMDMzQ7169aCmpobk5GSoqalJ9n/++edIS0uDu7s7GjVqhPj4ePzyyy+wsLDA5cuXoaysLLm2GDFiBLZs2fLe57OwsICdnR0OHjwotf3Vq1fo3Lkzrl+/jpEjR8LJyQkvX75EdHQ0QkJC8PDhQ+jp6ZUrn+TkZDRq1Aht27bFhAkToKGhAXd3d+jr6//n+0FUIQIRyeTOnTvCuHHjBAsLC0FdXV2oX7++0LFjR+GXX34RcnNzJceZm5sLAEq9jRkzRhAEQYiMjBQACPPnzy/xPCdPnhQ6dOgg1KpVS2jYsKEwceJEITMzs8RxOTk5wrfffisYGhoKGhoaQtu2bYXQ0NBSc6+qmAEBAUK7du0EHR0dQVVVVTAyMhKGDh0q3L17t9SYoaGhAgBhzZo1pe7/L5mZmULt2rUFAMKOHTtK7F+8eLHQrl07QVtbW6hdu7bQrFkzYcmSJUJ+fr7kmM2bNwsAhM2bN7/3uR48eCAAEH788cdS98fFxQnDhw8XDA0NBTU1NcHExETo1auXEBISUqF8CgoKhK+//lpo2LChoKSkJPA/50RENQ+vQaRdvnxZ8PT0FOrWrSvUr19f6NOnj3Dnzh2pY9zc3Mp8L94+l+bl5QlTpkwRWrZsKWhqagpqamqCubm5MGbMGOHBgwelPn9ZOnbsKAAQxo4dW2JfSEiI8Mknnwj6+vqCurq6YGZmJowfP1548uSJ5Jjia4sRI0b853OZm5sLXl5epe7LysoS/P39BWtra0FdXV3Q09MTOnToIKxcuVJyjVGefARBEH799VehcePGgoqKigBAiIyMLP8bQlROHFlBRERERERERAqFPSuIiIiIiIiISKGwWEFERERERERECoXFCiIiIiIiIiJSKCxWEBEREREREZFCYbGCiIiIiIiIiBQKixVEREREREREpFBUqzoBRfDmzRskJSWhfv36UFJSqup0iIiIKpUgCMjKyoKxsTGUlfm7RVXiNQgREdU05b0OYbECQFJSEkxNTas6DSIiog/q0aNHaNSoUVWnUaPxGoSIiGqq/7oOYbECQP369QEUvVmamppVnA0REVHlyszMhKmpqeT8R1WH1yBERFTTlPc6hMUKQDLsUlNTkxcKRERUY3DaQdXjNQgREdVU/3UdwomqRERERERERKRQWKwgIiIiIiIiIoXCYgURERERERERKRQWK4iIiIiIiIhIobBYQUREREREREQKhcUKIiIiIiIiIlIoLFYQERERERERkUJhsYKIiIiIiIiIFAqLFURERERERESkUFisICIiIiIiIiKFwmIFERERERERESkUFiuIiIiIiIiISKGwWEFERERERERECoXFCiIiIiIiIiJSKCxWEBEREREREZFCYbGCiIiIiIiIiBQKixVEREREREREpFBYrCAiIiIiIiIihcJiBREREREREREpFBYriIiIiIiIiEihsFhBRERERERERAqlSosVhYWFmDt3LiwtLVG7dm1YWVlh0aJFEARBcowgCJg3bx6MjIxQu3ZteHp64u7du1Jx0tLSMGTIEGhqakJbWxtjxoxBdnb2h345REREVE3wGoSIiEixVWmx4ocffsD69euxdu1axMTE4IcffsCKFSvwyy+/SI5ZsWIF1qxZgw0bNuD8+fOoW7cuunfvjtzcXMkxQ4YMwc2bNxEWFoaDBw/ixIkT+OKLL6riJREREVE1wGsQIiIixaYkvP0TwgfWq1cvGBgYIDAwULLN29sbtWvXxo4dOyAIAoyNjTFt2jR8++23AICMjAwYGBhgy5YtGDhwIGJiYtC8eXNcvHgRbdq0AQCEhoaiZ8+eePz4MYyNjf8zj8zMTGhpaSEjIwOampqV82KJiIgUBM97vAYhIiKqKuU991XpyIoOHTogPDwcd+7cAQBcu3YNp06dQo8ePQAADx48QHJyMjw9PSWP0dLSQvv27XH27FkAwNmzZ6GtrS25SAAAT09PKCsr4/z586U+b15eHjIzM6VuREREVHPwGoSIiEixqVblk8+cOROZmZlo1qwZVFRUUFhYiCVLlmDIkCEAgOTkZACAgYGB1OMMDAwk+5KTk6Gvry+1X1VVFTo6OpJj3rVs2TJ8//338n45REREVE3wGoSIiEixVenIiuDgYOzcuRO///47rly5gq1bt2LlypXYunVrpT6vv78/MjIyJLdHjx5V6vMRERGRYuE1CBERkWKr0pEV06dPx8yZMzFw4EAAgL29PeLj47Fs2TKMGDEChoaGAICnT5/CyMhI8rinT5/CwcEBAGBoaIiUlBSpuAUFBUhLS5M8/l0aGhrQ0NCohFdERERE1QGvQYiIiBRblY6sePXqFZSVpVNQUVHBmzdvAACWlpYwNDREeHi4ZH9mZibOnz8PFxcXAICLiwvS09Nx+fJlyTERERF48+YN2rdv/wFeBREREVU3vAYhIiJSbFU6suKzzz7DkiVLYGZmhhYtWiAqKgqrVq3C6NGjAQBKSkr45ptvsHjxYtjY2MDS0hJz586FsbEx+vbtCwCwtbXFp59+inHjxmHDhg14/fo1Jk2ahIEDB5arCzcRERHVPLwGISIiUmxVunRpVlYW5s6di3379iElJQXGxsYYNGgQ5s2bB3V1dQCAIAiYP38+Nm3ahPT0dHTq1Anr1q1DkyZNJHHS0tIwadIk/P3331BWVoa3tzfWrFmDevXqlSsPLhtGREQ1Cc97vAYhIiKqKuU991VpsUJR8EKBiIhqEp73FAc/CyIiqmnKe+6r0p4VRERERERERETvYrGCiIiIiIiIiBQKixVEREREREREpFBYrCAiIiIiIiIihcJiBREREREREREpFBYriIiIiIiIiEihsFhBRERERERERAqFxQoiIiIiIiIiUigsVhARERERERGRQmGxgoiIiIiIiIgUCosVRERERERERKRQWKwgIiIiIiIiIoXCYgURERERERERKRQWK4iIiIiIiIhIobBYQUREREREREQKhcUKIiIiIiIiIlIoLFYQERERERERkUJhsYKIiIiIiIiIFIpqVSdAREREROK8ePEC+/btQ0JCAgDAzMwMffv2hY6OThVnRkREJBuOrCAiIiKqhvbu3YtmzZrh6NGjyMnJQU5ODo4cOYLmzZtj7969VZ0eERGRTDiygoiIiKgamj17Ns6fPw8LCwup7Q8ePECPHj3g7e1dNYkRERHJAUdWEBEREVVDhYWFJQoVAGBpaYmCgoIPnxAREZEcsVhBREREVA21bdsWo0ePxoULF/D06VM8ffoUFy5cwOjRo9GmTZuqTo+IiEgmLFYQERERVUOBgYGwtLTE6NGjYWNjAxsbG4wZMwbm5uYICgqq6vSIiIhkoiQIglDVSVS1zMxMaGlpISMjA5qamlWdDhERUaXieU9x8LMgIqKaprznPo6sICIiIqqmYmNjkZKSIrkfGBiICxcuVHFWREREsmOxgoiIiKga+vHHH+Hm5oY2bdpgx44d+OSTT3DkyBH4+PggICCgqtMjIiKSCZcuJSIiIqqGtmzZgtjYWGRnZ6NZs2a4ceMGLC0t8fz5c3Tp0gVTpkyp6hSJiIhEY7GCiIiIqBrS0NBAgwYN0KBBA+jp6cHS0hIAoKenBzU1tSrOjoiISDacBkJERERUDWloaOCff/7Bjh07oKSkhN27dwMAIiMjoaKiUsXZERERyYYjK4iIiIiqoTVr1uCLL76AsrIy/vrrLyxfvhwjRoxAvXr1EBwcXNXpERERyYTFCiIiIqJqqG3btoiKipL8/ccffyA1NRUNGjSAsjIHzxIRUfXGMxkRERHRR0JZWZmFCiIi+ijwbEZERERUDb29POmDBw/QokULGBsbw9LSEtHR0VWYGRERkexYrCAiIiKqhrZu3Sq5P2vWLHz11VfIycnBypUr4efnV4WZERERyY7FCiIiIqJq7tatW5g4cSIAwNvbG8+ePavijIiIiGTDBptERERE1VB6ejr+/vtvCIKA169fS+0TBKGKsiIiIpIPFiuIiIiIqiEzMzOsWrUKAGBgYIDExESYmJggJSUF6urqVZwdERGRbFisICIiIqqG/v3331K36+rq4vjx4x82GSIiIjljzwoiIiKij0BBQQGioqKQnZ2NOnXqVHU6REREMmGxgoiIiKgaioiIgK6uLvT09HD8+HF06NABgwcPhpWVFUdWEBFRtcdpIERERETVkL+/P8LDw5Geng5vb28EBwfD3d0dFy5cwLRp03Dy5MmqTpGIiEg0FiuIiIiIqqH8/Hw4ODgAALS1teHu7g4AaNeuHbKzs6swMyIiItlV6TQQCwsLKCkplbgVrxOem5uLiRMnQldXF/Xq1YO3tzeePn0qFSMhIQFeXl6oU6cO9PX1MX36dBQUFFTFyyEiIqJq4mO4Bnnz5o3kvo+Pj9S+wsLCD5YHERFRZajSYsXFixfx5MkTyS0sLAzA/51wp06dir///ht79uzB8ePHkZSUhH79+kkeX1hYCC8vL+Tn5+PMmTPYunUrtmzZgnnz5lXJ6yEiIqLq4WO4BnFyckJmZiYAYNmyZZLtcXFx0NTU/GB5EBERVQYlQRCEqk6i2DfffIODBw/i7t27yMzMRMOGDfH777+jf//+AIDY2FjY2tri7NmzcHZ2xuHDh9GrVy8kJSXBwMAAALBhwwbMmDEDz549K/ca45mZmdDS0kJGRgZP7kRE9NHjea+kj+kapKCgAAUFBahVq5Zc4hEREclTec99CrMaSH5+Pnbs2IHRo0dDSUkJly9fxuvXr+Hp6Sk5plmzZjAzM8PZs2cBAGfPnoW9vb3kIgEAunfvjszMTNy8ebPM58rLy0NmZqbUjYiIiGqm6noNcv/+fXTt2hWNGzeGn58fcnNzAQCqqqro2rWr6LhERESKQGGKFfv370d6ejpGjhwJAEhOToa6ujq0tbWljjMwMEBycrLkmLcvEor3F+8ry7Jly6ClpSW5mZqayu+FEBERUbVSXa9BvvzyS/Tv3x979uzB8+fP4eHhgaysLACQFC6IiIiqK4UpVgQGBqJHjx4wNjau9Ofy9/dHRkaG5Pbo0aNKf04iIiJSTNX1GiQlJQUTJ06Ek5MTtm3bBi8vL3h4eCAjIwNKSkpyzJqIiOjDU4ilS+Pj43Hs2DH8+eefkm2GhobIz89Henq61C8bT58+haGhoeSYCxcuSMUq7tRdfExpNDQ0oKGhIcdXQERERNVRdb4GycnJkfp71qxZUFdXlxphQUREVF0pxMiKzZs3Q19fH15eXpJtTk5OUFNTQ3h4uGTb7du3kZCQABcXFwCAi4sLoqOjkZKSIjkmLCwMmpqaaN68+Yd7AURERFQtVedrEFtbW4SGhkpt+/bbbzF48GDExcV9kByIiIgqS5WvBvLmzRtYWlpi0KBBWL58udS+L7/8EocOHcKWLVugqamJr7/+GgBw5swZAEXLhjk4OMDY2BgrVqxAcnIyhg0bhrFjx2Lp0qXlzoFd0YmIqCbhea9Idb8GycvLA4BSR2okJibCxMSkQvGIiIg+hPKe+6p8GsixY8eQkJCA0aNHl9i3evVqKCsrw9vbG3l5eejevTvWrVsn2a+iooKDBw/iyy+/hIuLC+rWrYsRI0Zg4cKFH/IlEBERUTVU3a9B3jedhIUKIiKq7qp8ZIUi4C9MRERUk/C8pzj4WRARUU1T3nOfQvSsICIiIiIiIiIqxmIFERERERERESkUFiuIiIiIiIiISKGwWEFERERERERECoXFCiIiIiIiIiJSKCxWEBEREREREZFCYbGCiIiIiIiIiBQKixVEREREREREpFBYrCAiIiIiIiIihcJiBREREREREREpFBYriIiIiIiIiEihsFhBRERERERERAqFxQoiIiIiIiIiUigsVhARERERERGRQmGxgoiIiIiIiIgUCosVRERERERERKRQWKwgIiIiIiIiIoXCYgURERERERERKRQWK4iIiIiIiIhIobBYQUREREREREQKhcUKIiIiIiIiIlIoqlWdABEREREplsLCQhw/fhwJCQkAADMzM7i5uUFFRaWKMyMiopqCxQoiIiIikjh58iQGDx4MExMTmJubAwAePnyIpKQk7Ny5E66urlWcIRER1QQsVhARERGRxMSJE7Fv3z60adNGavvFixcxevRoREdHV1FmRERUk7BnBRERERFJ5ObmlihUAEDbtm2Rl5dXBRkREVFNxGIFEREREUlYWVlh4cKFSElJkWxLSUnB999/D0tLyyrMjIiIahIWK4iIiIhIYtu2bYiPj4eVlRVq166N2rVrw8rKCvHx8di+fXtVp0dERDUEe1YQERERkUTDhg0RGBiIwMBApKWlAQB0dHSqOCsiIqppOLKCiIiIiEqlo6MjVahITU2twmyIiKgmYbGCiIiIiMrF0dGxqlMgIqIagtNAiIiIiEjiwIEDZe7Lzc39gJkQEVFNxmIFEREREUl8/vnncHNzgyAIJfZlZWVVQUZERFQTsVhBRERERBI2NjYICgqChYVFiX2mpqYfPiEiIqqR2LOCiIiIiCRGjBiB58+fl7pvwoQJHzgbIiKqqZSE0sb41TCZmZnQ0tJCRkYGNDU1qzodIiKiSsXznuLgZ0FERDVNec99HFlBRERERFJevnyJgoICAEBaWhrCw8Px+PHjKs6KiIhqEhYriIiIiEhi27Zt0NPTg6WlJSIiImBnZwd/f384ODhg9+7dVZ0eERHVEGywSUREREQSK1euRGxsLDIyMuDq6opjx46hTZs2uHfvHry9veHr61vVKRIRUQ3AYgURERERSaioqMDc3BwAoK2tjTZt2gAArK2toazMQblERPRh8IxDRERERBLKysq4efMmTp06hZcvX+L06dMAgNjYWBQWFlZxdkREVFNwZAURERERSSxatAiurq5QVlbGrl27MGfOHCQlJSE5ORmbNm2q6vSIiKiGYLGCiIiIiCR69uyJ1NRUyd9dunTB1atXYWpqCn19/SrMjIiIapIqnwaSmJiIoUOHQldXF7Vr14a9vT0uXbok2S8IAubNmwcjIyPUrl0bnp6euHv3rlSMtLQ0DBkyBJqamtDW1saYMWOQnZ39oV8KERERVSO8BimftLQ0ZGRkcAoIERF9UFVarHjx4gU6duwINTU1HD58GLdu3cJPP/2EBg0aSI5ZsWIF1qxZgw0bNuD8+fOoW7cuunfvjtzcXMkxQ4YMwc2bNxEWFoaDBw/ixIkT+OKLL6riJREREVE1wGuQsg0fPhwpKSkAgIiICDRv3hwzZ85Eq1atsH///qpNjoiIagwlQRCEqnrymTNn4vTp0zh58mSp+wVBgLGxMaZNm4Zvv/0WAJCRkQEDAwNs2bIFAwcORExMDJo3b46LFy9KulWHhoaiZ8+eePz4MYyNjf8zj8zMTGhpaSEjIwOamprye4FEREQKiOc9XoO8T6tWrXDt2jUAgJubGwICAuDg4IAHDx6gX79+iIqKquIMiYioOivvua9KR1YcOHAAbdq0gY+PD/T19eHo6Ihff/1Vsv/BgwdITk6Gp6enZJuWlhbat2+Ps2fPAgDOnj0rtawWAHh6ekJZWRnnz5//cC+GiIiIqg1eg5QtJydHcv/Vq1dwcHAAAFhaWnIqCBERfTBVWqy4f/8+1q9fDxsbGxw5cgRffvklJk+ejK1btwIAkpOTAQAGBgZSjzMwMJDsS05OLtHsSVVVFTo6OpJj3pWXl4fMzEypGxEREdUcvAYpW/fu3TFlyhRkZ2fD09MTO3fuhCAIOHz4MPT09Ko6PSIiqiGqtFjx5s0btG7dGkuXLoWjoyO++OILjBs3Dhs2bKjU5122bBm0tLQkN1NT00p9PiIiIlIsvAYp208//QRlZWWYmJhg165dGDZsGNTV1REQEIDAwMCqTo+IiGqIKi1WGBkZoXnz5lLbbG1tkZCQAAAwNDQEADx9+lTqmKdPn0r2GRoaSppAFSsoKEBaWprkmHf5+/sjIyNDcnv06JFcXg8RERFVD7wGKZu6ujpWr16NJ0+e4MCBA7h8+TKSk5MRGhoKS0vLqk6PiIhqiCotVnTs2BG3b9+W2nbnzh2Ym5sDKJobaWhoiPDwcMn+zMxMnD9/Hi4uLgAAFxcXpKen4/Lly5JjIiIi8ObNG7Rv377U59XQ0ICmpqbUjYiIiGoOXoP8tzp16sDe3h6Ojo7Q1dUFADRp0qSKsyIioppCtSqffOrUqejQoQOWLl2KAQMG4MKFC9i0aRM2bdoEAFBSUsI333yDxYsXw8bGBpaWlpg7dy6MjY3Rt29fAEW/gnz66aeSoZuvX7/GpEmTMHDgwHJ14SYiIqKah9cgZbt+/XqZ+7Kysj5gJkREVJNV6dKlAHDw4EH4+/vj7t27sLS0hJ+fH8aNGyfZLwgC5s+fj02bNiE9PR2dOnXCunXrpCr7aWlpmDRpEv7++28oKyvD29sba9asQb169cqVgyIuG0ZERFRZeN4rwmuQ0ikrK8PCwgKlXSImJiYiPz+/CrIiIqKPRXnPfVVerFAEinihQEREVFl43lMcivhZWFpa4vTp06WODjE1NVXIPhtERFR9lPfcV6U9K4iIiIhIsfTu3Rv3798vdZ+Xl9cHzoaIiGqqKu1ZQURERESKJSAgoMx9lb20KxERUTGOrCAiIiIiIiIihcJiBREREREREREpFBYriIiIiIiIiEihsFhBRERERERERAqFxQoiIiIiIiIiUigsVhARERERERGRQmGxgoiIiIiIiIgUCosVRERERERERKRQWKwgIiIiIiIiIoXCYgURERERERERKRQWK4iIiIiIiIhIobBYQUREREREREQKhcUKIiIiIiIiIlIoLFYQERERERERkUJhsYKIiIiIiIiIFAqLFURERERERESkUFisICIiIiIiIiKFwmIFERERERERESkUFiuIiIiIiIiISKGwWEFERERERERECoXFCiIiIiIiIiJSKCxWEBEREREREZFCYbGCiIiIiIiIiBQKixVEREREREREpFBYrCAiIiIiIiIihSK6WJGeno7ffvsN/v7+SEtLAwBcuXIFiYmJckuOiIiIiIiIiGoeVTEPun79Ojw9PaGlpYWHDx9i3Lhx0NHRwZ9//omEhARs27ZN3nkSERERERERUQ0hamSFn58fRo4cibt376JWrVqS7T179sSJEyfklhwRERERERER1TyiihUXL17E+PHjS2w3MTFBcnKyzEkRERERERERUc0lqlihoaGBzMzMEtvv3LmDhg0bypwUEREREREREdVcoooVvXv3xsKFC/H69WsAgJKSEhISEjBjxgx4e3vLNUEiIiIiIiIiqllEFSt++uknZGdnQ19fHzk5OXBzc4O1tTXq16+PJUuWyDtHIiIiIvpIXbt2rapTICIiBSRqNRAtLS2EhYXh9OnTuHbtGrKzs9G6dWt4enrKOz8iIiIi+oh99tlnSEhIqOo0iIhIwVS4WPH69WvUrl0bV69eRceOHdGxY8fKyIuIiIiIPhJr1qwpdbsgCMjOzq6U5zx48CB69epVKbGJiKjyVbhYoaamBjMzMxQWFlZGPkRERET0kZk2bRqGDBkCJSWlEvuKe6DJ21dffcViBRFRNSZqGsjs2bMxa9YsbN++HTo6OvLOiYiIiIg+Ira2tvD390fTpk1L7Dt27JjouH5+fqVuFwQBGRkZouMSEVHVE1WsWLt2Le7duwdjY2OYm5ujbt26UvuvXLkil+SIiIiIqPqbOnUq8vPzS923ePFi0XHXrVuH7777DioqKiX2lTaKg4iIqg9RxYq+ffvKOQ0iIiIi+liNGjWqzH0jRowQHdfOzg4+Pj6wt7cvse+3334THZeIiKqeqGLF/Pnz5Z0HEREREdUQr169wu3bt2FtbY369euLjrNw4ULUrl271H1bt24VHfdtBQUFiI6ORuPGjaGlpSWXmERE9N+UZXnw5cuXsWPHDuzYsQNRUVHyyomIiIiIPiIzZsyQ3L927Rqsra0xaNAgWFlZ4cSJE6Lj9uzZE9bW1qXuc3d3FxUzIiICurq60NPTw/Hjx9GhQwcMHjwYVlZWOH78uOhciYioYkQVK1JSUuDu7o62bdti8uTJmDx5MpycnODh4YFnz57JO0ciIiIiqsbCwsIk9+fOnYt169YhNjYWBw4cgL+/v0yxc3JykJCQUGL7zZs3RcXz9/dHeHg4QkJC4O3tjeXLlyMmJgaHDh3CnDlzZMqViIjKT1Sx4uuvv0ZWVhZu3ryJtLQ0pKWl4caNG8jMzMTkyZPLHWfBggVQUlKSujVr1kyyPzc3FxMnToSuri7q1asHb29vPH36VCpGQkICvLy8UKdOHejr62P69OkoKCgQ87KIiIiohuA1SNVJSEiQ9D9zdnbGq1evRMc6evQojIyMYG9vj9atW+PevXuSfcOGDRMVMz8/Hw4ODujSpQu0tbUlIzTatWuH7Oxs0bkSEVHFiOpZERoaimPHjsHW1layrXnz5vjf//6HTz75pEKxWrRoIbVklarq/6U0depU/PPPP9izZw+0tLQwadIk9OvXD6dPnwYAFBYWwsvLC4aGhjhz5gyePHmC4cOHQ01NDUuXLhXz0oiIiKiG4DXIh5OSkoI1a9ZAEARkZWVJ7Xvz5o3ouHPmzMGJEydgb2+PoKAgeHp64uDBg7Czs4MgCKJivp2Pj4+P1L7CwkLRuRIRUcWIKla8efMGampqJbarqalV+ISjqqoKQ0PDEtszMjIQGBiI33//XVLR3rx5M2xtbXHu3Dk4Ozvj6NGjuHXrFo4dOwYDAwM4ODhg0aJFmDFjBhYsWAB1dXUxL4+IiIhqAF6DfDjdunWT9DdzdXXFkydPYGRkhMTEROjr64uO+/r1a7Rs2RIAMGbMGFhYWKBXr17466+/RC9d6uTkhMzMTGhqamLZsmWS7XFxcdDU1BSdKxERVYyoaSDu7u6YMmUKkpKSJNsSExMxdepUeHh4VCjW3bt3YWxsjMaNG2PIkCGSOYeXL1/G69ev4enpKTm2WbNmMDMzw9mzZwEAZ8+ehb29PQwMDCTHdO/eHZmZmaLnKRIREVHNwGuQD2fz5s1SNyMjIwCAiYmJVD+LisrNzUVeXp7kbw8PD2zduhW9e/fGkydPRMUMCgoqtShhbm4uU65ERFQxoooVa9euRWZmJiwsLGBlZQUrKytYWloiMzMTv/zyS7njtG/fHlu2bEFoaCjWr1+PBw8eoHPnzsjKykJycjLU1dWhra0t9RgDAwMkJycDAJKTk6UuEor3F+8rS15eHjIzM6VuREREVHPwGuTDys/Pl5qWcejQIcybNw979+6VKW6/fv3w77//Sm1zc3PD9u3bZVoStTSqqqoy9dcgIqKKETUNxNTUFFeuXMGxY8cQGxsLALC1tZX6BaI8evToIbnfsmVLtG/fHubm5ggODi5zzWx5WLZsGb7//vtKi09ERESKjdcgH1b79u1x7Ngx6Orq4pdffsHGjRvRs2dP/PDDD7hx4wbmz58vKu6SJUtK3e7q6oo7d+7IknKpHB0dS115hIiI5E9UsQIAlJSU0K1bN3Tr1k1uyWhra6NJkya4d+8eunXrhvz8fKSnp0v9svH06VPJ/FJDQ0NcuHBBKkZxp+7S5qAW8/f3h5+fn+TvzMxMmJqayu11EBERUfXCa5DKVVhYCF1dXQDA9u3bcfz4cejq6iInJwft2rUTXawo9ubNGygrSw8YfvHiBRo0aFDhWAcOHChzX25uboXjFXvw4AEePXqEtm3bShXEwsLC5Ho9TUT0sRBVrJg8eTKsra1LLFO6du1a3Lt3Dz///LOoZLKzsxEXF4dhw4bByckJampqCA8Ph7e3NwDg9u3bSEhIgIuLCwDAxcUFS5YsQUpKiqQ5U1hYGDQ1NdG8efMyn0dDQwMaGhqiciQiIqKPD69BKldBQQGys7NRr149qKurQ0dHBwBQu3ZtmVYDuXTpEnx8fJCUlISePXti06ZNaNiwIYCi/hVXrlypcMzPP/8cbm5upa4m8u5KJuW1c+dOfPPNNzA0NER6ejqCg4Ml/5ZmzJghU7EiJiYGf/zxh2TEh5mZGXx9fdGiRQvRMYmIFIGonhV79+5Fx44dS2zv0KEDQkJCyh3n22+/xfHjx/Hw4UOcOXMGn3/+OVRUVDBo0CBoaWlhzJgx8PPzQ2RkJC5fvoxRo0bBxcUFzs7OAIBPPvkEzZs3x7Bhw3Dt2jUcOXIEc+bMwcSJE2vkhQARERGVD69BPqxJkyahR48eOHbsGLy8vDBhwgT8+++/mDFjBtq2bSs67tSpU7F27VokJSXBzs4Orq6uSExMBADRS5fa2NggKCgIkZGRJW56enqiYv7444+IiopCdHQ0Nm/eDF9fX0RERMiUJwD873//Q48ePZCXl4f27dujffv2yMvLg5eXF9auXSs6LhGRQhBE0NDQEO7evVti+927dwUNDY1yx/H19RWMjIwEdXV1wcTERPD19RXu3bsn2Z+TkyN89dVXQoMGDYQ6deoIn3/+ufDkyROpGA8fPhR69Ogh1K5dW9DT0xOmTZsmvH79ukKvJyMjQwAgZGRkVOhxRERE1RHPe7wGqQr79+8XOnXqJOjo6AiampqCvb29sGzZMiE3N1d0TAcHB6m/t2/fLjRp0kRISEgQHB0dRcVcunSpcPHixVL3LV68WFTMli1bSv19/fp1wcLCQjh69KjoPAVBEGxsbIS0tLQS21NTUwVra2vRcYmIKlN5z31KglDxcq6dnR0mTJiASZMmSW3/5ZdfsH79ety6dUsuhZQPJTMzE1paWsjIyOD62URE9NHjeU9x8LOQTdOmTRETEyPVr2L37t2YO3cu8vLyEB8fX4XZ/R97e3ucOXNGaoWSW7duwcvLC5mZmUhNTRUV19raGvfu3Sux/c2bN7CxsUFcXJyouDdu3ICdnZ2oxxIR/ZfynvtE9azw8/PDpEmT8OzZM7i7uwMAwsPDsXLlSgQEBIjLmIiIiIg+avJshAkAHTt2xKFDh9CrVy/JNl9fXygpKWHo0KEy5VqsoKAA0dHRaNy4MbS0tETF+OKLL3Dp0iV07dpVsq158+Y4dOgQpkyZIjq3Hj16oFu3bhg3bhzMzc0BAPHx8fj111/Rs2dP0XFbtmwJe3t7jBkzBkOHDpX0GCEi+pBEjawAgPXr12PJkiVISkoCAFhaWmL+/PkYPny4XBP8EPirBhER1SQ87ymOmvJZvK8RZuvWrUU1wqwsERER8PHxgZKSEvbu3Yvp06cjKysLz549w969e+Hm5lbVKUoIgoDt27cjODhYqsGmj48Phg0bVqIwVF729vZYsGABAgMDceLECfTq1Qtjx46Fp6enPNMnohqqvOc+Uf8Fy8nJwYgRI/D48WM8ffoU169fx6RJk2BgYCA6YSIiIiL6OFVGI0wASE9Pl1OG/8ff3x/h4eEICQmBt7c3li9fjpiYGBw6dAhz5syR2/OMGjVK5hhKSkoYPnw4Dh48iOvXr+P69es4ePAgRowYIbpQAQBqamrw9vbGoUOHEBMTAzs7O4wfPx4WFhZYuHChzHkTEZWHqGkgffr0Qb9+/TBhwgSoqanB09MTampqeP78OVatWoUvv/xS3nkSERERUTWVnZ0NLy8vAMCiRYvQtGlTuLu749ixY1BSUhIdV19fHz169MDYsWPh5eUl0xf0Yvn5+XBwcAAAaGtrS6Y8t2vXDtnZ2aJi+vn5ldi2b98+yfSXVatWiUsWRT8iPnv2DGZmZlLbb968KZflS01NTTFnzhzMmTMH4eHhCAoKkjkmEVF5iPov+pUrV9C5c2cAQEhICAwMDBAfH49t27ZhzZo1ck2QiIiIiKq3V69e4c2bN5K/hw4dioULF8LDw0N0c0mgaBqyq6srZsyYgUaNGmHmzJm4c+eOTLm+naePj4/UvsLCQlExN27ciGfPnkFLS0tyU1JSktwX6+jRozAyMoK9vT1at24t1Wxz2LBhouOqq6uXut3DwwM7d+4UHZeIqCJEFStevXol6WZ89OhR9OvXD8rKynB2dlaYrstEREREpBiKG2G+zdfXF4sXL8aTJ09Ex61bty6mTZuGW7duISQkBM+fP0ebNm3g6uqKbdu2iYrp5OSEzMxMAMCyZcsk2+Pi4kT3Fbl06RLi4+Ohq6uL+fPnY/78+dDW1pbcF2vOnDk4ceIE0tPTMXHiRHh6euLGjRsAZJtec+7cOdGPrQh5TIUhoo+XqGkg1tbW2L9/Pz7//HMcOXIEU6dOBQCkpKR81M2hiIiIiKjiSps6sHHjRowfPx4DBgyQy3N06NABHTp0QEBAAHbt2oVNmzaJavxe1jQHc3NzhIWFicrN1tYWERERWLhwIT755BP8+uuvMk1/Kfb69Wu0bNkSADBmzBhYWFigV69e+Ouvv2SOX1hYiOPHj0s17nRzc4OKioqoeJU5FeZt0dHRuHjxIlq2bIk2bdrIJSYRVQ1RxYp58+Zh8ODBmDp1Kjw8PODi4gKgaJSFo6OjXBMkIiIiourtwIEDJbbNnz8fRkZGAIDevXuLilva6IG6detizJgxGDNmjKiYxa5evYqHDx9CVVUVzZs3R+PGjaGqKurSGQCgqqqKhQsX4vz58/D29sbLly9lyg8AcnNzkZeXBw0NDQBF0zS2bt2K3r17Iz8/X3TckydPYvDgwTAxMZEsifrw4UMkJSVh586dcHV1rXDMjRs3ol+/frC2tpZsK54KIwsPDw/88ccf0NfXR3BwMKZOnYqOHTti4cKF8Pf3x/jx42WKT0RVR/TSpcnJyXjy5AlatWolaWZ04cIFaGpqolmzZnJNsrLVlGXDiIiIAJ73FElN+SyUlZXh4uIi1Qvh3LlzcHZ2hpKSEiIiIkTFTUtLg46OjrzSBABcv34dQ4YMQUJCArKzs9G8eXMkJibCw8MDgYGBcvmccnJyEBcXBzs7O5nizJ49G66urujevbvU9hMnTmDs2LGi+3e0bNkSQUFBJUYmXLx4EaNHj0Z0dHSFY8bExEhG0kyaNAlAUc+RBw8eiMrx7VyvX78OAHBxccGuXbtgbm6OtLQ0dOnSRbKPiBRHpS5dCgCGhoZwdHSU6rrcrl27aleoICIiIqLKFRgYCKBoqH9kZCQiIyNhaGiIyMhI0YUKAO8tVDRp0kRUzAkTJmD9+vXIyMjAvn370LVrVyQnJ6Np06b4+uuvxaaKvLw87N+/Hz///DMCAwPx7Nkz0bGKLVmypEShAgBcXV1lajSam5tb6hSKtm3bIi8vT1TM4qkwKSkp+OSTTxAfHy+XqTB5eXmSxqeCIEhGgujo6MjUt+Ndc+fOlVssIiof8WPZiIiIiIjKYdSoUXB3d8fYsWPRuXNnzJ49Wy5fVN/3q3lWVpaomK9evUKnTp0AFE1PWbRoEdTV1bF48WLRBZDIyEiMHDkS2trauH37Njp37ox169ahXr162LdvH0xMTETFLfbmzZsSy7a+ePFC0g+ioqysrLBw4UJMmDAB+vr6AIp6061fvx6Wlpai86yMqTCDBg2Cr68vli9fjv79+2PJkiUYMmQIDh8+jMaNG4uKWdrqhuvXr0fDhg0BAJMnT5YpZyIqHxYriIiIiKjSmZub4+jRo1i1ahU6d+4s+hf6tzk4OMDCwqLUX9DFLomqpqaG2NhYNGvWDOfOnUPdunUl+8Q2l5w2bRqOHTsGGxsbXLx4Eb/88gvCwsLw66+/YuLEidi/f7+ouJcuXYKPjw+SkpLQs2dPbNq0SfKF2sPDA1euXBEVd9u2bZg5cyasrKxQUFAAoKjQ4OPjg+3bt4uK+bb27dvj5MmTiIuLkznWggULEBAQgK5du+Lp06coKCjAihUrMGjQIGzevFlUTD8/P3h5eUmN3MnLy0NUVJRcimxEVD6ie1Z8TGrKfFEiIiKA5z1FUlM/i5s3b+LkyZOYMGGCTHEsLS1x+vRpGBsbl9hnamqKR48eVThmaGgohg4dCj09PaSmpiIkJARubm5ITk7GvHnzsGnTpgrHdHBwwNWrVyV/Ozk54fLlywCApk2b4vbt2xWOCQCdO3fGzJkz4ezsjJ9//hkhISE4duwYTExM4OjoiKioKFFx35aWlgbg/VNuZHXt2jW0atVK5jhZWVkoKCgQPaKkWEREBPz9/TF37lz06tULgHz6a7yPvN4Douqg0ntWEBERERGJ0aJFC5kLFUDRNI379++Xus/Ly0tUzE8//RT37t3Dzp07ERcXBzc3NwBF/drEFCoAoF69eoiMjAQAhISESKZWyCo7OxteXl7Q1dXFokWLMHv2bLi7u+PRo0dyGwGgo6MDTU1NREVFISMjQy4x3/XZZ5/JJU79+vVRv359mXN1d3dHWFgYgoODMWrUKGRmZlb6iAp5vQdEHxNOAyEiIiKiaikgIKDMfRs2bBAdV1tbG05OTiW2N2nSRFTjytWrV6Nfv354/vw5jIyM8NdffwEoWl1vyJAhovN89eqVVL+KoUOHQk1NDR4eHjJNs4mIiICPjw+UlJSwd+9eTJ8+HVlZWXj27Bn27t0rKeBURGl9IICippjZ2dkKlSsAaGpqYtu2bZKRNTk5OaJzLFZZ7wHRx4rTQFBzh2ASEVHNxPOe4uBnoXje17Sze/fuePLkiejYqamp0NXVFf34d40ePRr9+vWTTFUoFhwcjKFDhyI/P19U3Pbt22Pjxo1IT09H//79ERwcDHd3d1y4cAHTpk3DyZMnKxxTTU0NQ4YMKXWEQkhIiOiGqJWR67uePn2Ky5cvo2fPnjLFqaz3oLCwEMePH0dCQgIAwMzMDG5ubqJ7rBBVtvKe+ziygoiIiIjo/6uMpp3F3i5UyLJaR7GgoKBStw8YMAADBgwQHTc/Px8ODg4AikaZuLu7AwDatWsnegSAra0t/P390bRp0xL7jh07plC5AqUXAAoLC2UqAFTGe3Dy5EkMHjwYJiYmkmVbHz58iKSkJOzcuROurq6i85W3Fy9eYN++fVLvad++fSu1HwpVb+xZQURERET0/5mbm+PUqVN48OBBiZuBgYGomG9PV3nw4AFatGgBY2NjWFpaIjo6WnSu6enpoh/7Pm/evJHc9/HxkdpXWFgoKubUqVPLHOmxePFiUTGBysn15MmTsLCwwKxZs3D48GEcPnwY/v7+sLCwwIkTJ0TnWhnvwcSJE7Fv3z6cO3cOu3fvxu7du3H+/Hn8+eefmDhxouhc3/Xq1StERUWJHv2xd+9eNGvWDEePHkVOTg5ycnJw5MgRNG/eHHv37pVbnvSREUjIyMgQAAgZGRlVnQoREVGl43lPcfCzUDyTJ08WTp48Weq+8ePHi4rp6OgouT9w4EBh7dq1giAIQkhIiODp6SkqpiAIgpqamtC7d2/hwIEDQmFhoeg47xo1alSp/ybv3bsndOzYUW7PIw+Vkau9vb1w8eLFEtsvXLgg2NnZiYpZWWxsbETt+y/fffed5P7Vq1cFIyMjoWnTpkLDhg2F48ePVzhe06ZNhQcPHpTYfv/+faFp06ai8xQEQcjNzRXWr18v/P3334IgCEJgYKAwdOhQYfny5UJeXp6omNHR0TLlRO9X3nMfe1aA80WJiKhm4XlPcfCzqBlat26NK1euAABatWqFa9euSfa9u6xpRTRt2hRffPEFAgMDkZ6ejuHDh2P06NFo0qSJPNIuoaCgAK9fv0bt2rVFx3i7IWgxeUyJeZcsub6vkarYJqtlmTt3LhYtWiT68T169ICLiwsmTJggWWUmJSUF69evx5kzZ3DkyBFRcd/+N9u7d2+MHj0affv2xblz5zBt2jScPn26QvFsbGxw9+7dUvdZW1vj3r17ovIEgFGjRiE5ORmvXr1C8+bNcfv2bfTv3x9HjhyBvr4+fv311wrHVFZWhr29PcaMGYOhQ4dyqoqcsWcFEREREZECSE9Px99//w1BEPD69WupfbL8bli3bl1MmzYN06ZNw5kzZxAUFIQ2bdrAwcEBY8eOxfDhw2XK++rVq3j48CFUVVXRvHlzNG7cGKqq4r4+XLp0CT4+PkhKSkLPnj2xadMmNGzYEADg4eEh+WIsq+joaFy8eBEtW7ZEmzZtRMWwsrLCwoULSy0AWFpais6ttNVA1q9fL3kfJk+eXOGY27Ztw8yZM2FlZYWCggIAgKqqKnx8fLB9+3bRub4tISEBffv2BQA4Ozvj1atXFY7Rtm1bjB49GhMmTJD01oiPj8eGDRtEf07FLly4gBs3biA3NxeGhoZISkpC3bp1MW7cODg6OoqK2aJFC8ybNw+BgYGYNWsWevXqhbFjx8LT01OmXKli2LOCiIiIiKgSmZmZYdWqVVi9ejUMDAyQmJgIoOgLsLq6ulyeo0OHDvjtt9/w5MkTjBgxAps2bRId6/r167C3t4ebmxu8vb3h7++PNm3awMfHB5mZmaJiTp06FWvXrkVSUhLs7Ozg6uoqeR9kKdh4eHggJSUFQNEqKJ9++ilCQ0PRv39/bNy4UVTMbdu2IT4+HlZWVqhduzZq164NKysrxMfHy1QA8PPzQ3h4OKKioiS3vLw8REVFiR5d07BhQwQGBiIrKwuJiYlITExEVlYWgoKCJIUWMVJSUrBmzRoEBASU6FPxdp+Q8goMDISlpSVGjx4NGxsb2NjYYMyYMTA3Ny+zUWx5qaqqQklJCbVq1UKtWrVQt25dAEWrr4htiKqmpgZvb28cOnQIMTExsLOzw/jx42FhYYGFCxeKzjU0NFQysio8PBxz587F7t27Rcf72HEaCDgEk4iIahae9xQHP4uarbCwEHl5eahTp46oxzs6OiIqKkrOWRUVPlasWIFOnTrhwIEDOHbsGFauXImFCxfi0aNH2Lp1q8y57tixA4sWLcKxY8fQp08f0SMrWrZsKVlu1sXFBbt27YK5uTnS0tLQpUuX9y5FWx5paWkAIJdpABEREfD398fcuXMly81aWlriwYMHMsV9d+USc3NzuLq6yrRyyahRo6T+Xrp0KYyMjJCYmIiRI0ciLCxMppzlqXfv3mjSpAmysrJw9+5dtGjRAkOGDEFoaCjOnTuH0NDQCsd8exrM28LDwxEUFISdO3dWOOb06dNx5MgRFBQUYPjw4dixYwd69uyJyMhIdOvWDUuXLq1wzOqqvOc+FivACwUiIqpZeN5THPwsaob79+9j7NixePjwIfr27YulS5eiVq1aAIq+YJ89e1ZU3LS0tEqZS/9uH422bdvi4sWLAMT3bGjatCliYmKk+lXs3r0bc+fORV5eHuLj40Xl2rRpU9y6dQsqKipwdnbGuXPnJPvs7e1Fr7by8uVLaGhoQFVVFWlpaYiKikLTpk3RqFEjUfGKZWZmYtKkSVBRUUFAQAAcHBxw//590fGKly41NjaGhYUFAMVduvRtr169wu3bt2FtbY369evLFOv58+dYsmQJlJSUMHfuXPzxxx9Yv349rKys8PPPP0vel4p499+SPDRv3hxXr17Fy5cv0ahRI8THx0NPTw8vX75Eu3btcPPmTbk+nyIr77mP00CIiIiIiCrRl19+CW9vb+zZswfPnz+Hh4eHZGh9bm6u6LjvK1TI0mRTTU0NsbGxAIBz585JhtUDEP1rfceOHXHo0CGpbb6+vli8eDGePHkiOtdBgwbB19cX9+7dQ//+/bFkyRI8fPgQ69evR+PGjUXF3LZtG/T09GBpaYmIiAjY2dnB398fDg4OMg/Z19TUxLZt29CrVy+4ubkhJydHpnjFS5eeP39e7kuXxsbGSqbYxMbGIjAwEOfPnxcVa8aMGZL7165dg7W1NQYNGgQrKyuZloMFAD09PaxevRqrVq1CgwYN8NVXXyE6Ohr79+8XVagAIPdCBQBoaGhAXV0dDRo0gLa2NvT09AAU9Z6R13Swjw0bbBIRERERVaKUlBTJF8dt27Zh6dKl8PDwQFhYGJSUlETHfd8Uh3f7DFTEokWL0KlTJ+jp6SE1NRUhISEAgOTkZHTu3FlUzLL6EgwYMAADBgwQneuCBQsQEBCArl274unTpygoKMCKFSswaNAgbN68WVTMlStXIjY2FhkZGXB1dcWxY8fQpk0b3Lt3D97e3vD19RWdbzFvb2906tQJly9flilObm5uqQ0q27Zti7y8PNFxf/zxR6xcuRIaGhpYunQpZs2aBWdnZyxcuBB+fn6YMmVKheKFhYXhhx9+AFC0Asq6detkWl3kXU+ePMHu3bslDWHt7OwwaNAgaGhoyBS3WEFBAaKjo9G4cWNoaWmJitGgQQOsXbsWGRkZ0NPTww8//IARI0YgNDRUqiBI/4fFCiIiIiKiSvTur+ezZs2Curq61AgLMRwcHGBhYVFqg8rU1FTRcT/99FPcu3cPcXFxsLGxkQzTNjQ0lKlxZ2WZMmUKpkyZgqysLBQUFMi8DKqKiopkxQptbW1JMcDa2rrEsquy0NXVhZGRETIyMkR/Aa6slUu2bNmC2NhYZGdno1mzZrhx4wYsLS3x/PlzdOnSpcLFirfJY3WRtwUHB+O7775Dq1atcObMGXh6euLmzZtYsGABDh8+DFtb2wrHjIiIgI+PD5SUlLB3715Mnz4dWVlZePbsGfbu3Qs3N7cKx9y0aROmT58OZWVl/P3331i3bh1sbGxgZWUlqg9MTcBpIERERERElcjW1rZEk79vv/0WgwcPRlxcnOi45ubmOHXqFB48eFDiZmBgIFPO2tracHJyKjGfXOz0kri4OHTt2hWNGzeGn5+f1PQXFxcXmXItVr9+fZkLFQCgrKyMmzdv4tSpU3j58qXkV//Y2FgUFhaKjhsREQFdXV3o6enh+PHj6NChAwYPHgwrKyscP35cVMzKWrlEQ0MDDRo0gKmpqWRKDFA05UJNTa3C8eS9usjbFi1ahEuXLuGvv/7C+fPnkZubi8OHD2Pjxo2YNGmSqJj+/v4IDw9HSEgIvL29sXz5csTExODQoUOYM2eOqJjW1tbYt28f9u7dCzMzMyxfvhxZWVm4evUqWrVqJSomANy4cUP0YxUdR1YQEREREVWiXbt2lbrdz89PpikFvXv3xv3792FsbFxin5eXl+i4lTG95KuvvkL//v3h7OyMgIAAeHh4IDQ0FPXr15epb0dcXBzGjh2L+Ph4uTUvXbRoEVxdXaGsrIxdu3Zhzpw5ePLkCZ48eSLTyJLiL8Dp6enw9vZGcHAw3N3dceHCBUybNg0nT56scMzipUsDAwPlunKJhoYG/vnnH7x48QJKSkrYvXs3fH19ERkZKapvSbdu3SSrwbi6uuLJkyeS1UVkWWIVKBoJU9z/oXHjxpJmrd27d8c333wjKmZ+fj4cHBwAFBXu3N3dAQDt2rVDdna2TPkWk1eT0ZYtW8Le3h5jxozB0KFDK6XpblVhsYKIiIiIqBK9b968iYmJ6LgBAQFl7tuwYYPouJUxvaSy+nZURhGkZ8+eUq+zS5cuuHr1KkxNTWX6Yl3ZX4CLv6S+ePFC5hEma9aswRdffAFlZWX89ddfWL58OUaMGIF69eohODi4wvHK6h9iYmIi8zKo+vr62Lx5M3r06IEdO3ZIGqsKgoCCggJRMd8e7eHj4yO1T+zomhkzZkj6dly7dg09evSApqYm0tLSEBISInrllhYtWmDevHkIDAzErFmz0KtXL4wdOxaenp6i4ikSTgMhIiIiIiKJypheUlrfjgEDBsjct6O4COLk5IRt27bBy8sLHh4eyMjIkKkI8jYVFRU0btxY5hEAlfEF+O2C1YMHD9CiRQsYGxvD0tJS9LKtQFGDzqioKFy+fBmtWrXCH3/8gcTERKSkpEiKLBWVl5eH/fv34+eff8batWsRGRkpOr+3rVu3DoGBgbCxscE///yDVatWAQCePXuGmTNniorp5OSEzMxMAMCyZcsk2+Pi4kQvM/12Uaa4yWhsbCwOHDgAf39/UTGBotV7vL29cejQIcTExMDOzg7jx4+HhYUFFi5cKDquImCxgoiIiIiIJIqnl5RG7PSSyurbURlFkKtXr8LBwQGtW7fGzZs34eXlBRMTE5iZmb13isx/qYwvwG83Zpw1axa++uor5OTkYOXKlfDz8xOda3p6eolturq6ohuMRkZGokmTJpg/fz5mzpyJv/76CxMnTkS7du2QmJgoOk+gqBfEqVOnkJWVhcjISJiZmQEoGnExZswYUTGDgoJK/UzMzc1lHgkCyL/JaDFTU1PMmTMHcXFxCAwMxO3bt+US923Xrl2Te8yysFhBREREREQSAQEB6NSpU6n7xE4v2bVrF7p27Vpiu5+fHx49eiQqJlA5RZApU6ZgwYIFmDx5Mnr27ImBAwfi1atXWLNmDb799lvRuVb2F+Bbt25Jptp4e3vj2bNnomPp6+ujT58+OHDggMwNMAFg2rRpOHbsGK5du4aTJ0/CyMgIt27dwrhx4yQ5y9PcuXPlHnPjxo1QVVVF7dq1RT2+spqMqqurl7rdw8MDO3fuFB23LJ999pncY5aFPSuIiIiIiKhSHThwQDL14fnz5xgxYgROnToFR0dHbNu2TXTcymhempmZKfnVe968eRg2bBgAoG/fvliwYIGomEDRaAVtbe0S21VVVaGqKu5rWXp6Ov7++28IgoDXr19L7Sut50h5WVpawtXVFTNnzsSECRMwfPhwjB49WvRqMG/evIGNjQ2AoikmN2/eBACMGzcOK1euFJ0nUNRf413r169Hw4YNAQCTJ0+ucMwDBw6U2DZ//nwYGRkBKBp9VFGV1WT03Llzoh9bltLeU6Do35S8GoyWB4sVRERERERUqZYtWyYpVvj7+8Pe3h6BgYH4/fffMWXKFOzbt09U3Pc1L+3atSvu3LlT4Zhvf8l/dzSILAUAfX199OjRA2PHjoWXl5foKRVvMzMzk/RoMDAwQGJiIkxMTJCSklLmL+7lUbduXUybNg3Tpk3DmTNnEBQUhDZt2sDBwQFjx47F8OHDKxSvXr16iIyMRNeuXRESEiJz/4+3+fn5wcvLS2oVjLy8PERFRYnuW9K3b1+4uLhIvYcZGRlYvXo1lJSURBUrKqvJaFlFMFlMmzYNQ4YMKfX9e7coVplYrCAiIiIiokr19pf8Cxcu4MqVK1BRUYGfn59U34WKqoxlVg0MDJCZmQlNTU2p3J48eSJZFlWM4tEKM2bMwPjx42UerQAA//77b6nbdXV1cfz4cdFx39ahQwd06NABAQEB2LVrFzZt2lThYsXq1avRr18/PH/+HEZGRvjrr78AAMnJyRgyZIhM+R09ehT+/v4YN24cevXqBaDofSmrOFAegYGB+O2337Bq1So4OjoCKPr8ZGkKeuPGDdjZ2Yl+fFkaNmyInj17yrUIZmtrC39/fzRt2rTEvmPHjskcv7yUBFnKgx+JzMxMaGlpISMjQ3RzGyIiouqC5z3Fwc+CagpbW1sEBwdDEAQMHz4cV69elexzcHCQ+rsilJWVy1xmNTExEfn5+SIzLikzMxPp6emSBo4V1bp1a1y5cgUAJKMVgoODRY9WqEyOjo6SKQvylJqaCl1dXbnHzczMxKRJk6CiooKAgAA4ODiU2SS2vOLj4zF27Fh07twZs2fPho2NjUwxlZWVYW9vjzFjxmDo0KFSI0Fk0bRpU3zxxRcIDAxEenq6XIpgmzdvRps2bWBvb19i39atWzFixAhZUi73uY8NNomIiIiIqFLl5OSgT58+6NOnDzIyMvD48WMARUPrZfkluDKWWd2zZ4/k/vPnz+Hl5QUtLS1RQ//L0qFDB/z222948uQJRowYgU2bNomKExcXh65du6Jx48aYOnUqcnNzJftcXFxE5xceHi76se9TWqEiNTVV5riamprYtm0bevXqBTc3txKrxIhhbm6Oo0ePom7duujcuTPy8vJkiteiRQvMmzcPoaGhMDMzw8CBA+UySqF4ys6tW7cQEhKC58+fo02bNnB1dRXdD2bUqFGlFioAyFyoqAgWK4iIiIiIqFI9fPgQ9+/flxQSGjVqBABQU1PD3r17RcetjGVW315WtLi/xu3bt9G7d29MmTJFVEyg9H4XdevWxZgxY3Dq1ClRMb/66iv0798fe/bsQWpqqtSSrW8XLiqqtF/9X7x4ITre+xRPs5AHb29vhIaGIjAwUC7xlJSUMG3aNPz6668yrzCipqYGb29vHDp0CDExMbCzs8P48eNhYWGBhQsXyiVfeRXBgKK+H/v378fPP/+MtWvXyjQFRixOAwGHYBIRUc3C857i4GdBpHjengLRqlUrSX+N4r+vXbsmKm5aWprchv4Xe3e6xtKlS7F//36EhYWha9eukmknFXX16lWMHDkSysrK2L59O7777jtERkZCT08PBw8eRMuWLSsUr7TVNYqNHTsWKSkpovKsTt6eBvS28PBwBAUFiV5mtDKm7ERGRmLkyJHQ1tbG7du30blzZyQmJqJevXrYt28fTExMZIpf3nMfG2wSERERERH9f7m5uYiOjoYgCFBSUpIUKgCIXl0CKH20QrEmTZqIWrnk3ekOs2bNgrq6utQICzGmTJmCBQsWID09HT179sTixYvxzz//YP/+/fj2229x9OjRCsX7/PPP4ebmVuroElnyBIqmwowdOxbx8fHo27cvli5dKmmE6uLigrNnz8oU/11iP6uyVmfx8PCAh4eH6HwqY8rOtGnTcOzYMdjY2ODixYv45ZdfEBYWhl9//RUTJ07E/v375f6cpWGxgoiIiIiI6P8r7q9R/MX68ePHaNSokcz9NSpj5RJbW1uEhobi008/lWz79ttvoaysjG+//VZUTKDol+++ffsCAObNm4dhw4YBKFrSc8GCBRWOZ2Njg6CgIFhYWJTYZ2pqKjpP4P+mwjg7OyMgIAAeHh4IDQ1F/fr1RU+FqYzP6ty5c6Ie91/kPVoHAN68eQMbGxsAQNu2bXHz5k0AwLhx47By5Uq5P19ZWKwgIiIiIiL6/x4+fFjqdln7azg4OJS5conYJpO7du0qdbufnx98fX1FxQSk+2t07dq1zH3lNWLECDx//rzUYsWECRMqHO9tKSkpmDhxIgBg27ZtWLp0KTw8PBAWFiZ6JExlfFYAUFhYiOPHjyMhIQEAYGZmBjc3N6nRO/IkdhRIvXr1EBkZia5duyIkJAT6+vqVkN1/U5gGm8uXL4eSkhK++eYbybbc3FxMnDgRurq6qFevHry9vfH06VOpxyUkJMDLywt16tSBvr4+pk+fjoKCgg+cPREREVVnvA4hov9Sp04dWFpain58ZaxcoqGhAQ0NjVL3vVtkqAgDAwNkZmYCKFqqstiTJ08kUywqwt/fH23atJHatnHjRgDA7NmzRecJlD4VZsCAATJNhamMz+rkyZOwsLDArFmzcPjwYRw+fBj+/v6wsLDAiRMnRMUEikaBlHUT+/pXr16N4cOHo3bt2vjuu++wYsUKAEBycjKGDBkiOteKUoiRFRcvXsTGjRtLNGqZOnUq/vnnH+zZswdaWlqYNGkS+vXrh9OnTwMoqkx5eXnB0NAQZ86cwZMnTzB8+HCoqalh6dKlVfFSiIiIqJrhdQgRfQjFK5cYGxuX2Cd25ZLKmK4AAEeOHCl1e506dRASElLheKU12Jw/fz6MjIwAQKZlYStjKkxlfFYTJ07Evn37ShRtLl68iNGjRyM6OlpU3MoYBdK2bVs8evQIqampUsvNGhoaYt68eaJiilHlq4FkZ2ejdevWWLduHRYvXgwHBwf8/PPPyMjIQMOGDfH777+jf//+AIDY2FjY2tri7NmzcHZ2xuHDh9GrVy8kJSVJKlwbNmzAjBkz8OzZszKbmLyLnbiJiKgm4Xnv/1T1dQg/CyKShbKycplfVBMTE5Gfny/35xQztUBZWRkuLi5S/108d+4cnJ2doaSkhIiICNH55OXlAUCpI0wSExNlXrlCXt73vomdrgEAlpaWOH36dKmFFVNTUzx69EhU3He9ePECDRo0kEus8p77qnwayMSJE+Hl5QVPT0+p7ZcvX8br16+ltjdr1gxmZmaSjq5nz56Fvb291FCc7t27IzMzU9IEpDR5eXnIzMyUuhEREVHN86GvQ3gNQkTyVBnTFQD5Ty0IDAwEAKxatQqRkZGIjIyEoaEhIiMjZSpUAJUzFWbPnj2S+8+fP4eXlxe0tLTQpUsXSb+JirKyssLChQullmlNSUnB999/L9P0ouJRIKUROwokICBAcv/Bgwdo0aIFjI2NYWlpKXoEiBhVOg1k165duHLlCi5evFhiX3JyMtTV1aGtrS213cDAAMnJyZJj3v0/YfHfxceUZtmyZfj+++9lzJ6IiIiqs6q4DuE1CBHJU2VMVwDkP7Vg1KhRcHd3x9ixY9G5c2fMnj1bpmVg31YZU2GWLVsGHx8fAEX9Nuzt7REYGIjff/8dU6ZMwb59+yocc9u2bZg5cyasrKwkvY1UVVXh4+OD7du3i8oTkC4svGvDhg2iYm7duhVTpkwBUNQD5KuvvsLEiROxd+9e+Pn5ISwsTFTciqqyYsWjR48wZcoUhIWFiWrSIgt/f3/4+flJ/s7MzJR5yRwiIiKqPqrqOoTXIEQkT5XxRRX4vxEbZU0tEBvz6NGjWLVqFTp37iyZviGryujZ8HasCxcu4MqVK1BRUYGfn59Uw9GKaNiwIQIDAxEYGIi0tDQAQHBwsMyrobzr2bNniI6Ohq2traQniCxu3bqFP/74AwDg7e2NRYsWyRyzvKqsWHH58mWkpKSgdevWkm2FhYU4ceIE1q5diyNHjiA/Px/p6elSv2o8ffoUhoaGAIoafFy4cEEqbnGX7uJjSvO+oUJERET08auq6xBegxBRdVBZIzaUlJQwbdo0fPrppzh58qQsKUpURmElNzcX0dHREAQBSkpKUkuLih0RUlqT0QULFsDExASCIIhuMjp8+HCsXLkS+vr6iIiIgK+vLywtLfHw4UNs2rQJffv2rXDM9PR0/P333xAEAa9fv5ba9yFbXlZZscLDw6PEfJdRo0ahWbNmmDFjBkxNTaGmpobw8HB4e3sDAG7fvo2EhAS4uLgAAFxcXLBkyRKkpKRI1n4NCwuDpqYmmjdv/mFfEBEREVUbvA4hIipbZY3YKNaiRQu0aNFC5jhA5RRWcnJy0KdPH8kX88ePH6NRo0bIyMiAsrK4to99+/Yt0WQ0IyMDq1atgpKSkuhixbVr1yTnoO+//x5hYWFwcHDAgwcP0K9fP1HFCjMzM6xatQpA0fTG4kalKSkp5V7EQh6qrFhRv3592NnZSW2rW7cudHV1JdvHjBkDPz8/6OjoQFNTE19//TVcXFzg7OwMAPjkk0/QvHlzDBs2DCtWrEBycjLmzJmDiRMn8lcLIiIiKhOvQ4iIPg6VUVh5+PBhqdvV1NSwd+9eUTEDAwPx22+/YdWqVXB0dARQtJJHZGSkqHjFcnJyJPdfvXoFBwcHSezCwkJRMf/9999St+vq6uL48eOiYopR5auBvM/q1avRq1cveHt7w9XVFYaGhvjzzz8l+1VUVHDw4EGoqKjAxcUFQ4cOxfDhw7Fw4cIqzJqIiIg+BrwOISKit9WpU0f0yh2jRo3C77//ju+++w4LFy5EYWGhXJqMdu/eHVOmTEF2djY8PT2xc+dOCIKAw4cPQ09PT+b4b1NRUUGdOnXkGvN9lIQPOelEQXGNcyIiqkl43lMc/CyIiGoWQRCwatUq7N27F/Hx8UhMTJQpXn5+PmbMmIGgoCDo6OggPj4eKioq8PDwwPr160UVV+Li4jB27FjEx8ejT58+WLZsmaQZtYuLi2QJb7HKe+5jsQK8UCAiopqF5z3Fwc+CiKhmunnzJk6ePCm31UBevXqFuLg4FBQUwNzcHDo6OqJjde/eHb1794azszMCAgIQFxeH0NBQ1K9fH46OjoiKipIp1/Ke+xR6GggRERERERHRx6ZFixZyKVTs2bMHQNEUFSMjI8yZMweWlpbo2rUrEhISRMVMSUnBxIkT4eTkhG3btsHLywseHh7IyMiQy9SV8mKxgoiIiIiIiKgaWrZsmeS+v78/7O3tcfv2bXz22WeYMmWKqJhvN+0EgFmzZmHAgAHw8PBAVlaWTPlWBIsVRERERERERNXQ210dLly4gCVLlsDQ0BB+fn64f/++qJi2trYIDQ2V2vbtt99i8ODBiIuLkynfiqiypUuJiIiIiIiISLzc3FxER0dDEAQoKSlBRUVFsk/slI1du3aVut3Pzw++vr6iYorBYgURERERERFRNZSTk4M+ffpIRlg8fvwYjRo1QkZGBpSVxU2k0NDQKHNf165dcefOHVFxK4rFCiIiIiIiIqJq6OHDh6VuV1NTw969e0XFvH79epn7PmTPChYriIiIiIiIiD4iderUgaWlpajHOjg4wMLCQqofRrHU1FRZUys3FiuIiIiIiIiICABgbm6OU6dOwdjYuMQ+U1PTD5YHVwMhIiIiIiIiIgBA7969y1xJxMvL64PlwZEVRERERERERAQACAgIKHPfhg0bPlgeHFlBRERERERERAqFxQoiIiIiIiIiUigsVhARERERERGRQmGxgoiIiIiIiIgUCosVRERERERERKRQWKwgIiIiIiIiIoXCYgURERERERERKRQWK4iIiIiIiIhIobBYQUREREREREQKhcUKIiIiIiIiIlIoLFYQERERERERkUJhsYKIiIiIiIiIFAqLFURERERERESkUFisICIiIiIiIiKFwmIFERERERERESkUFiuIiIiIiIiISKGwWEFERERERERECoXFCiIiIiIiIiJSKCxWEBEREREREZFCYbGCiIiIiIiIiBQKixVEREREREREpFBYrCAiIiIiIiIihcJiBREREREREREpFBYriIiIiIiIiEihsFhBRERERERERAqFxQoiIiIiIiIiUigsVhARERERERGRQmGxgoiIiIiIiIgUCosVRERERERERKRQWKwgIiIiIiIiIoXCYgURERERERERKRQWK4iIiIiIiIhIoVRpsWL9+vVo2bIlNDU1oampCRcXFxw+fFiyPzc3FxMnToSuri7q1asHb29vPH36VCpGQkICvLy8UKdOHejr62P69OkoKCj40C+FiIiIqhFegxARESm2Ki1WNGrUCMuXL8fly5dx6dIluLu7o0+fPrh58yYAYOrUqfj777+xZ88eHD9+HElJSejXr5/k8YWFhfDy8kJ+fj7OnDmDrVu3YsuWLZg3b15VvSQiIiKqBngNQkREpNiUBEEQqjqJt+no6ODHH39E//790bBhQ/z+++/o378/ACA2Nha2trY4e/YsnJ2dcfjwYfTq1QtJSUkwMDAAAGzYsAEzZszAs2fPoK6uXq7nzMzMhJaWFjIyMqCpqVnhnNPS0vDmzRvo6enhxYsX+Pfff9G8eXM0bdq0wrGIiIgqm6znvY9VdbwGISIiqm7Ke+5TmJ4VhYWF2LVrF16+fAkXFxdcvnwZr1+/hqenp+SYZs2awczMDGfPngUAnD17Fvb29pKLBADo3r07MjMzJb+MlCYvLw+ZmZlSN7F2794NS0tLWFlZYffu3XBzc8OmTZvQpUsX/Pnnn6LjEhER0YdRXa9BiIiIPmaqVZ1AdHQ0XFxckJubi3r16mHfvn1o3rw5rl69CnV1dWhra0sdb2BggOTkZABAcnKy1EVC8f7ifWVZtmwZvv/+e7nk/8MPPyAmJgZZWVlo06YNzpw5A3t7e8TFxWHQoEFSQ0Yr4saNG7Czs5NLjkRERFRSdb8GISIi+phV+ciKpk2b4urVqzh//jy+/PJLjBgxArdu3arU5/T390dGRobk9ujRI9GxBEGAsbExmjZtChMTE9jb2wMArKys8Pr1a9FxW7ZsiVatWmHNmjVIS0sTHYeIiIhKV92vQYiIiD5mVV6sUFdXh7W1NZycnLBs2TK0atUKAQEBMDQ0RH5+PtLT06WOf/r0KQwNDQEAhoaGJTpzF/9dfExpNDQ0JN2/i29iFRYWSu5PnDhRap8sHcFbtGiBefPmITQ0FGZmZhg4cCCOHTsmOh4RERFJq+7XIERERB+zKi9WvOvNmzfIy8uDk5MT1NTUEB4eLtl3+/ZtJCQkwMXFBQDg4uKC6OhopKSkSI4JCwuDpqYmmjdv/kHy7d27t2S+6ddffy3ZHhMTAwsLC9Fx1dTU4O3tjUOHDiEmJgZ2dnYYP348LCwssHDhQlnTJiIiondUt2sQIiKij1mVrgbi7++PHj16wMzMDFlZWfj999/xww8/4MiRI+jWrRu+/PJLHDp0CFu2bIGmpqakGHDmzBkARaMaHBwcYGxsjBUrViA5ORnDhg3D2LFjsXTp0nLnoYiduFu3bo0rV66U2B4eHo6goCDs3LmzCrIiIqKPgSKe9z40XoMQERFVjfKe+6q0wWZKSgqGDx+OJ0+eQEtLCy1btpRcJADA6tWroaysDG9vb+Tl5aF79+5Yt26d5PEqKio4ePAgvvzyS7i4uKBu3boYMWLEBx15UFmNMMta8szDwwMeHh5yfz4iIqKa5GO4BiEiIvqYVenICkUhy68aysrKsLe3x5gxYzB06FDo6OhUUpZERETywV/zFQc/CyIiqmnKe+5TuJ4V1c2HaoRZUFCAqKgoZGRkyD02ERERERERkSJhsUJGldUIMyIiArq6utDT08Px48fRoUMHDB48GFZWVjh+/LgcXwERERERERGRYmGxQo5MTU0xZ84cxMXFITAwELdv3xYdy9/fH+Hh4QgJCYG3tzeWL1+OmJgYHDp0CHPmzJFj1kRERERERESKpUobbH4MKqsRZn5+PhwcHAAA2tracHd3BwC0a9cO2dnZouMSERERERERKTqOrJDRuXPnKiXumzdvJPd9fHyk9hUWFlbKcxIREREREREpAhYrKlFqaqroxzo5OSEzMxMAsGzZMsn2uLg4dgsnIiIiIiKijxqLFZXI0dFR9GODgoJKLUqYm5sjLCxMlrSIiIiIiIiIFBp7VsjowIEDZe7Lzc2V63Nt3LgR48ePh6oqPzYiIiIiIiL6ePFbr4w+//xzuLm5QRCEEvuysrJExy2tCDJ//nwYGRkBAHr37i069ps3b6CsLD2o5sWLF2jQoIHomERERERERETywmKFjGxsbBAUFAQLC4sS+0xNTUXH7du3L1xcXKRWG8nIyMDq1auhpKQkqlhx6dIl+Pj4ICkpCT179sSmTZvQsGFDAEWrl1y5ckV0vkRERERERETywp4VMhoxYgSeP39e6r4JEyaIjhsYGAgAWLVqFSIjIxEZGQlDQ0NERkYiIiJCVMypU6di7dq1SEpKgp2dHVxdXZGYmAgApY4MEaugoABRUVHIyMiQW0wiIiIiIiKqOViskJG/vz/atGlT6r7Zs2eLjjtq1Cj8/vvv+O6777Bw4UIUFhZCSUlJdDwAyM7OhpeXF3R1dbFo0SLMnj0b7u7uePTokUyxIyIioKurCz09PRw/fhwdOnTA4MGDYWVlhePHj8uUMxEREREREdU8LFZUgo0bN8oljrm5OY4ePYq6deuic+fOyMvLkyneq1ev8ObNG8nfQ4cOxcKFC+Hh4SHTMqv+/v4IDw9HSEgIvL29sXz5csTExODQoUOYM2eOTDkTERERERFRzcOeFTKqzEaYAKCkpIRp06ahe/fuOHXqlEyxOnbsiEOHDqFXr16Sbb6+vlBSUsLQoUNFx83Pz4eDgwMAQFtbG+7u7gCAdu3aITs7W6aciYiIiIiIqOZhsUJGldEIszR2dnYwMTGRKUZQUFCp2wcMGIABAwaIjvv2aA0fHx+pfYWFhaLjfkgbNmyQqccIERERERERyQ+ngcioMhphAsDVq1fh4OCA1q1b4+bNm/Dy8oKJiQnMzMxw/fp1eaUvIcs0ECcnJ2RmZgIAli1bJtkeFxcHTU1NmXMDiqawREVFybQc7PssXbq0UuJeu3atUuISERERERF9zJQEeS4DUU1lZmZCS0sLGRkZor5cx8fHY+zYsejcuTNmz54NGxsb3L9/X6ac3NzcMHXqVKSnp2P+/PlYvHgxhg0bhv3792PdunU4evSoTPHfZWZmhoSEBLnGLCgowOvXr1G7du0KP3bGjBn44YcfABR94e/Rowc0NTWRlpaGkJAQuLq6Vjhmv379St0uCAKOHj2Kly9fVjjmf6mM95WISFaynvdIfvhZEBFRTVPecx+ngchBcSPMVatWyaURJlD0Afbt2xcAMG/ePAwbNgxA0bSTBQsWiIpZWn+NYrm5uaJiliY6OhoXL15Ey5Yty1wp5b+EhYVJihVz587FunXr0LdvX5w7dw7Tpk3D6dOnKxzzyJEj+Pnnn6Wm7ABFxYqTJ0+KyhMA1qxZU+p2QRDYs4OIiIiIiEgEFivkpLgR5qeffirTF99ibw946dq1a5n7KuLzzz+Hm5tbqY+XZXqFh4cH/vjjD+jr6yM4OBhTp05Fx44dsXDhQvj7+2P8+PGiYwNAQkKCpHDj7OyMV69eiYrj4OAAR0fHUgsoc+fOFZ3ftGnTMGTIkFKXf339+rXouERERERERDUVixVy1qJFC7Ro0QIA0KRJE9y5c0dUHAMDA2RmZkJTUxNbt26VbH/y5Alq1aolKqaNjQ2CgoJgYWFRYp+pqamomADw7Nkz6OvrAwBWr16NM2fOwNzcHGlpaejSpYuoYkVKSgrWrFkDQRBKFFLebuhZEQEBATA2Ni51X3h4uKiYAGBrawt/f380bdq0xL5jx46JjgsAoaGhMDIyQqtWrRAeHo5///0XdnZ28PX1lSkuERERERGRImOxQkbva3Ypy2iFI0eOlLq9Tp06CAkJERVzxIgReP78eanFCllWwsjLy0NhYSFUVFQgCALMzc0BADo6OqJHgXTr1g1RUVEAAFdXVzx58gRGRkZITEyUFEYq6n1TUpo0aSIqJgBMnToV+fn5pe5bvHix6LjTp0/HkSNHUFBQgOHDh2PHjh3o2bMnVq5ciWvXrsmlKeizZ88QHR0NW1tbyXK7FZWQkAB9fX3UqlULgiBg/fr1OHfuHFq1aoUpU6ZAVZX/mSEiIiIioophg03I1txKWVkZFhYWpX4pT0xMLPNLrCxkGbFRGRYsWIAbN25g+fLl2L9/P/Ly8jBkyBAcPnwYoaGh+Ouvv6o6xTJ169YNYWFhVZ1GqZo3b46rV6/i5cuXaNSoEeLj46Gnp4eXL1+iXbt2uHnzZoVjDh8+HCtXroS+vj4iIiLg6+sLS0tLPHz4EJs2bZJMt6mIli1b4uzZs6hbty7mzZuHM2fOoF+/fjh69ChMTEzwv//9r8Ixiahysamj4uBnQURENQ0bbH4g5ubmOHXqVKnTC2SZWlFZIzbKkpqaCl1dXVGPXbBgAQICAtC1a1c8ffoUBQUFWLFiBQYNGoTNmzfLlNebN2+grCy9wu6LFy/QoEGDCscqbTWQs2fPSrb/+eef4pJ8x6tXr3D79m1YW1ujfv36ouNoaGhAXV0d6urq0NbWhp6eHgCgbt26JZqElte1a9ckI1O+//57hIWFwcHBAQ8ePEC/fv1EFSsEQUDdunUBAP/88w9OnjyJOnXq4IsvvkDr1q1F5VkaeTRuJSIiIiKi6kH5vw+h9+ndu3eZy5R6eXmJjuvg4IC+ffuiT58+JW6pqami45bF0dFRpsdPmTIFjx49QmpqKlJTU5GRkYENGzZAR0dHVLxLly7B0tIStWvXxueff45nz55J9nl4eIiKeebMGejq6krex969e6N27dqSv8WaMWOG5P61a9dgbW2NQYMGwcrKCidOnBAdt0GDBli7di2WLFkCPT09/PDDD0hOTsaWLVskxYGKysnJkdx/9eoVHBwcAACWlpYoLCwUFVNJSQlPnz4FANSvX18y7UNFRQUFBQWiYgJFn3NKSgoAIDg4GJ9++ilCQ0PRv39/bNy4UXRcoPS+Jy9evJApJhERERERyQ+LFTIKCAhAp06dSt23YcMG0XGLR2w8ePCgxM3AwEBUzAMHDpR5k9fSpfXr10eDBg1k/jI5depUrF27FklJSbCzs4OrqysSExMBiF8N5fr160hNTUV0dDQGDhyIkSNHon79+hgxYgRGjBghOte3p5EUL7MaGxuLAwcOwN/fX3TcTZs2ITw8HFeuXMHff/+NFy9ewMbGBj///LPoqRXdu3fHlClTkJ2dDU9PT+zcuROCIODw4cOSkRsVNX/+fHTt2hW//fYbOnfuDG9vb2zZsgVDhgyRqWBXWuPW4OBgXLlyRfTrr4wiGBERERERyR+ngSio4hEbpU0vEfsFsLKWLj1w4ECJbfPnz5c0bOzdu3eFY2ZnZ0te56JFi9C0aVO4u7vj2LFjpS4RWh76+vr4888/ERQUBFdXV6xdu1ZUnPeR1zKrAGBtbY19+/ZJ/l6+fDmWL18uU34//fQTZsyYARMTE+jo6CA+Ph4jR46Eh4cHAgMDRcX09vZG48aNsWrVKty6dQsFBQXYu3cvBg8ejIEDB4rOtTIatxYXwZydnfHzzz/D1dUVx44dg4mJieiYpeGUFSIiIiIi2bDBJmpOc6tmzZohNDS0zKVLHz16JCqusrIyXFxcpPoonDt3Ds7OzlBSUkJERESFYzZt2hQxMTFS/Sp2796NuXPnIi8vD/Hx8aJyLfbgwQOMGTMG169fx/Pnz2WK1ahRI3z33XcQBAFr1qxBXFycZF+rVq1w7do1meK/be7cuVi0aJHMcV69eoW4uDgUFBTAzMxMdL+SylQZjVsdHR0lq8wAwI4dO7Bo0SIcO3YMffr0wZUrV0Tl6uHhgT/++AP6+voIDg7G1KlT0bFjR1y4cAH+/v6ilu9du3YtfH190bBhQ1E5Eb1PTTnvVQf8LIiIqKZhg00qobKWLg0MDMRvv/2GVatWSXpfWFpaIjIyUnTMjh074tChQ+jVq5dkm6+vL5SUlDB06FDRcYtZWloiIiICmZmZMseqjGVWAWDNmjUltq1fv17y5XXy5MmiYycnJ0t6n2RkZFRaseLgwYNSn2FFVEbj1levXkk1bR06dCjU1NTg4eGBvLw8UTGB0qesmJubIy0tDV26dBFVrJg+fTpmzJiB7t27Y+zYsejRo4foUUXlde3aNbRq1apSn4OIiIiIqDw4sgL8VUMe4uPjMXbsWHTu3BmzZ8+GjY1NmY1HFcmoUaNkXrGksqiqqsLLy0uqSWlISAj69+8PJSUlBAUFVThmTEwMRowYgUePHsHMzAxA0dQVU1NTbN68GS1atJBb/gBgZmaGhIQEmeNkZWWhoKBA1Cowbxs9ejT69etXooASHByMoUOHil5quGnTprh16xZUVFTg7OyMc+fOSfbZ29sjOjq6wjEdHR0RGhqKLVu2YPPmzcjOzsaIESMwevRoWFlZicrzv8jyecXGxkJHRwf6+vqIjY3F6dOnYWdnh/bt28s5S5IHnvcUBz8LIiKqacp77mOxArxQkBdBELBq1Srs3bsX8fHxkoaYYr18+RIaGhpQVVVFWloaoqKi0LRpUzRq1EhUPD8/vxLbgoKCMHr0aADAqlWrROdaGV/UIiIi4O/vj7lz50q+XFtaWuLBgweiY7Zv3x7fffcdvL29pbaHhIRgxYoVuHDhQoVjlva+AkX/HoKCgpCRkSEq13cVFBQgOjoajRs3hpaWllxiyktlTFlp3bq11LSUkydPIigoCCEhIWjTpo3okUuljdgBij6v77//HmlpaRWO+eOPP2LlypXQ0NDA0qVLMWvWLDg7O+P8+fPw8/PDlClTROVaGnlNharpeN5THPwsiIiopinvuY+rgdQgcXFx6Nq1Kxo3bgw/Pz+pFUBcXFxkjq+kpIRp06Zh06ZNmDt3rkyxtm3bBj09Pcl0DTs7O/j7+8PBwQG7d+8WFXPjxo149uwZtLS0JDclJSXJfbF+/PFHuLm5oU2bNtixYwc++eQTHDlyBAMGDEBAQIDouO7u7ggLC0NwcDBGjRqFzMxMmacBpKenlyhUAED//v1FFxXWrVuHevXqSb2vWlpa0NbWlinfiIgI6OrqQk9PD8ePH0eHDh0wePBgWFlZ4fjx46Ljvi06OhpBQUG4dOmSTHEWLFiAzp07o2vXrpg1axbmzp0r6VciduTOu3Xkzp07Y/PmzUhMTMTgwYNF5zpt2jRcuXIFUVFRUrerV6/i9evXomJu2bJFUqQbP348jh8/juDgYFy+fBm//vqr6FzXrFlT4rZ+/XrJfTEOHjwo05K6RERERPSBCCRkZGQIAISMjIyqTqVSffLJJ8LatWuFS5cuCcOGDRM6dOggZGZmCoIgCA4ODnJ9rrS0NJkeb29vLzx8+FC4du2aoKWlJVy8eFEQBEG4e/eu0LJlS1Exb926JXTu3Fn45ZdfJNssLCxkylMQBKF58+ZCWlqakJCQINSpU0e4f/++IAiC8OzZM6FFixYyxxcEQQgJCREcHBwEQ0NDmeJ06NBB2LZtm1BYWCjZVlhYKGzZskVwcXERFdPJyUm4fv16qfsaNWokKqYgCEK7du2EqKgoITIyUtDV1RXCw8MFQRCE8+fPC506dRIV093dXXj69KkgCIKwe/duwdjYWPDx8RHMzc2FDRs2iM71bZmZmTL/+xcEQZg4caIcsinJ3t5eiI2NLXWf2M/L0dFRct/MzExqnyz/bVFRURF69+4tjBw5UnKrV6+eMHLkSGHUqFGiYiorKwsNGzYUpk6dKty4cUN0bu+Kj48XcnJyBEEQhDdv3gj/+9//hGHDhgkrV64UXr9+LbfnkZeact6rDvhZEBFRTVPecx9HVtQgKSkpmDhxIpycnLBt2zZ4eXnBw8MDGRkZMv0CfvXqVTg4OKB169a4efMmvLy8YGJiAjMzM1y/fl1UTBUVFZibm6Nly5bQ1taWLP9obW0ttUJIRdja2iIiIgIpKSn45JNPEB8fL5eGhRoaGmjQoAFMTU0lo0EAQE9PD2pqajLHB4qWBw0NDRW9vGixrVu3YsuWLdDR0YGtrS1sbW2ho6Mj2S7GwoULUbt27TKfT6z8/Hw4ODigS5cu0NbWhru7OwCgXbt2yM7OFhWztEaYwcHBuHLlCv73v/+JzrXY1atXER4ejtOnT8s0XQdApSytCxQt31pWb47FixeLiqmhoYF//vkHO3bsgJKSkmT0U2RkJFRUVETnevToUSQnJ8Pb2xubN2/G5s2boaenh82bN4vq2QIALVu2xOHDh5Gbm4tOnTrB2dkZv/76q+h/U8V69eqFwsJCAEVLN//5559wdnbGyZMn5ToNhoiIiKjG+EDFE4VWU37VaNq0aYltP/74o+Dk5CRYW1uLjuvq6irs27dP2Lx5s2BmZiZs27ZNEARB2Ldvn9CtWzdRMVu3bi3cuHFDOHnypKCnpyecOnVKEARBiImJEezt7UXnWuzcuXOCk5OToK+vL3MsZ2dn4eDBg8L27dsFc3NzYdeuXYIgCEJERITg5OQkc/y3yevX/5SUFOHy5cvC5cuXhZSUFLnElLe3R9DMnDlTap/YfwNNmjQRCgoKBEEQhPbt20vts7OzExVTEATh2rVrgp2dnaCpqSkoKysLdnZ2QoMGDYT+/ftXyn9X/v77b7nHlMWFCxcEBwcHoXXr1sLVq1eFgQMHChoaGlIjYsTKyMgQhg0bJowcOVLIyMgQLC0tZYr39iiQnJwcYdu2bUKXLl2E+vXrix6tIQjS/35at24tvHz5UhAEQXj9+rVc/pslbzXlvFcd8LMgIqKaprznPhYrhJpzodC3b1/h8OHDJbb/9NNPgpKSkui4bw/zNjU1ldrXqlUrUTH/+ecfQUdHR9DT0xOOHTsmdOnSRWjatKmgqakpKQbI6tWrV0J0dLTMcSrri9pff/1V4mZgYCC5r0iSkpKE1atXC1OmTBGmTZsmBAUFCbm5uTLFHDVqVKn/n7x3757QsWNHUTHnz58veHt7C3fv3hV+/PFHYfHixcKDBw+EdevWCb179xadq4uLi3Dy5ElBEIo+t6+//lrIy8sTZs+eLQwfPlx03LK8+/+zioqJiZFMh4mJiRF+++034dy5c/JITeL58+dS041kJa+pUG8XK9529+5dYdasWaLj2tvbC8nJyYIgCIKbm5uQl5cnCELRlBBbW1vRcV+9eiXEx8eX2C7rFJaact6rDvhZEBFRTVPecx9XA0HN6cSdl5cHoGjI9rsSExNhYmIiKq6DgwOuXr0KABgxYoTU0P/iJoOyKiwsxNWrV2FqaioZxi/WmzdvSkwlefHihczLYr4tNTUVDRo0ED1lBQCUlZXh4uICdXV1ybZz587B2dkZSkpKiIiIkEeqEk2aNMGdO3cq/Ljg4GB89913aNWqFc6cOQNPT0+kp6cjJiYGhw8fhq2trVzzLCgowOvXr8ucevJfAgICsHLlSjx9+hQFBQWoX78+Bg0ahKVLl0otE1sRb/9/AADatm2LixcvAhD/vlbWKisfYuWO6OhoXLx4Ea1atYKTk5PM8Yo9ffoUly9fRs+ePUXH6NevH/7880+55VRs7969mDt3Lvz8/BAfH4+rV6/C29sbR48ehYmJCX788ccKxzx69CgGDBgAQRBgZWWF4OBgWFtbAyi5WkxF1ZTzXnXAz4KIiGoargZCJWhoaJRaqACArl27io5rYGCAzMxMANI9Cp48eYJatWqJjvs2FRUVODk5yVSouHTpEiwtLVG7dm18/vnnePbsmWSfh4eHPNKU0NXVhbKyMlJTU0XHKO5PsWrVKkRGRiIyMhKGhoaIjIwUXai4fv16mbesrCxRMRctWoRLly7hr7/+wvnz55Gbm4vDhw9j48aNmDRpkqiYZdm4cSNUVVVFFyoAYMqUKXj06BFSU1ORmpqKjIwMbNiwQXShAgDU1NQQGxsLoKigVLduXck+sT0bKmuVlcpYucPDwwMpKSkAiopXn376KUJDQ+Ht7Y2NGzeKzvVdKSkpSE5Olmn1lsooVABFfWV27tyJ48eP49ChQ0hISMDevXvx2WefYcWKFaJizpkzBydOnEB6ejomTpwIT09P3LhxA0DJ1WKIiIiIPjaqVZ0AfTjva3Yp9osqABw5cqTU7XXq1EFISIiomHv27IGPjw8A4Pnz5xgxYgROnToFR0dHbNu2DWZmZhWOOXXqVKxduxbOzs74+eef4erqimPHjsHExKTSLvwdHR2RkJAg6rGjRo2Cu7s7xo4di86dO2P27NkyNwR1cHCAhYVFqa9XbGFFRUUFenp6AIDGjRsjPj4eANC9e3d88803onM9cOBAiW3z58+HkZERAKB3796iYwNA/fr1ARQVQMaPHy9TrEWLFqFTp07Q09NDamqq5N99cnIyOnfuLCqmnZ0dfHx8YG9vX2Lfb7/9JjrX4oawDRo0kFtD2NIal5qbmyMtLQ1dunQR/f56eHjgjz/+gL6+PoKDgzF16lR07NgRCxcuhL+/v+i4sbGx0Pl/7d15WFRl/z/w96AgggIqIpBslgoh4oYLPS6JiYYpUbk8bpSWGl5ZyqPW19I2lzIfLUvzUcGlMrEHfXJBZdFyNwFTS1NR3EYJExAQRPj8/vDHxMgMwhlGZuD9uq65LjznzIf75p7x3POZcz5306ZwcnLSJG7atWuHbt26KYpXqmPHjli3bp1BMcoqKipC+/btAQDjxo2Dp6cnBg0ahC1btlRLcWAiIiIiU8ZkRR1ijA+qFbG3t0dAQICiS+DnzZunSVa8/fbb8PPzw6pVq/Dtt99iypQpiI2NrXLM3NxchISEALj/4bJt27bo27cv4uPjDZr46/pQXaqgoEBxXADw8PDArl27sGjRIvTs2VNzK48h8fbt2wdXV9dy+9zc3BTFdHJyQlRUFAYOHIj169ejVatWAO5/83vv3j3FbQ0NDS13G0x2djb+/e9/Q6VSKUpWGCsBMmDAAJw7dw7nz59H69atNZezOTs7Y8WKFYpiGmuVldKVO27duqVZuWPYsGEGrdxRWFiI4uJi1KtXDyICDw8PAEDTpk0NSgQaIwmi7zaYDz74wKDbYC5dugQnJydYW1tDRLBs2TIcOnQI/v7+mDJlCurXr/rptqCgAIWFhZor4oKCgrBmzRoMHjxY74ouRERERLUFkxV1iDE+qALGuWKj7AecI0eOIDk5GfXq1cPUqVMVf1DLz8/XqlcxatQoWFpaIigoyKAkwPPPP4/evXvr/FBmyBUrpVQqFaZNm4bg4GDs27fPoFiDBw9GWlqaztdAaSKnqr766iuEh4fjjTfeQJcuXTTj8+eff2LmzJmK27pq1SqsXLkSixYtQseOHQEAXl5eSEpKUhzTGAmQUg4ODmjSpImmjoC7u7smcaNERXUZSpdxVeLzzz/Ha6+9BgsLC2zZsgXz58/H2LFj0ahRI2zcuFFRzBEjRmDYsGGYP38+XnzxRXz88ccYOXIkduzYYdDfwBhJkNLbYHJzc+Ht7Y2TJ0/Cy8sLmZmZ6NOnj+JkxaBBg3Dw4EEA9xNgBw4cQFhYGHbt2oW0tDRFS+OGhYVhz549CA4O1mzr3bs31q1bh/HjxytqJxEREZG5YIFN1J3iVlOmTMFLL72Ef/zjH+X2TZw4EcuXL1cU18LCQu8VG1evXlX0DaCPjw82btwIEcGYMWO0ihc+WMywsl555RWEhYVh0KBBWts3btyIUaNGKf6m0tvbG3FxcfD09Cy3z83NDZcvX1YUV5fqLgRq6tLT07Vug2ndujXS0tIUx4uKisLKlSuxdOlSrQTIhQsXDGrnb7/9hvDwcFy5ckWT+Lt06RLc3NwQFRUFX19fRXHVajW+//57XLx4EfXr14evry/++c9/6q09o1R1FIQ1RuHSOXPm4OTJk5g/fz42b96MwsJCTRIkLi4OW7ZsqXLMsoUpPTw8NLctAfdv40hJSVHUVj8/P5w4cQIA0LlzZ/z888+wsbHBvXv30KlTpwqTujWhrpz3zAHHgoiI6ppKn/uMsBKJ2eGyYYbx9PSUq1ev6tzXsmVLRTE9PDzEy8tLPD09xdPTUy5fviwiIllZWXqXHqwpc+fOlaNHj+rc99FHHymOm5KSIv7+/tKxY0c5efKkPPvss9KwYUNxc3OT48ePK45bVkZGhiQkJMi1a9cMimPMpTBLSkpk4cKF0qNHD3F1dTU43sWLF6Vfv37y/vvvy71798TLy8vgmF27dpVNmzaV2x4TEyMBAQGKYn7//ffi4eEhgwcPFkdHRxk+fLgMGDBAPDw85LffflPc1lu3bil+bmXk5OTIX3/9VW3xFi9eLC1bthRLS0tRqVRiZ2cnEyZMkJs3byqK1717d9m6dausW7dOPDw8NEshJyYmSufOnRW301hLl5aVl5cnycnJkpOTY3AsnvdMB8eCiIjqmsqe+5isEE4UDPXGG2/Izz//rHPfhAkTqvV35eXlyYULFxQ99/z589KnTx/x8vKSt956S+7cuaPZ171792pqYfXp1auXxMbGSlRUlLi7u8vatWtFRCQ2NlaeeeYZRTFHjx6tSSokJCSIo6OjBAQESPPmzSU2NlZRzE8++UScnJzEzc1N1q1bJ25ubvLSSy+Ju7u7LF68WFFMXU6cOCHLli2rlljVnQBp06aNon0Vadeunfz5558icv+1GxoaKiIicXFx0rdvX0UxRUQsLS1l8ODBsmXLFikuLlYcpyxjJ0BEqi8JcuTIEenQoYN06tRJUlNTZfjw4dKgQQNp1qyZJCQkKI67adMm8fHxkf/85z8ya9YsGTRokERFRcmIESMkMjJSUczp06drfk5NTRUXFxdp27atNG/eXPbu3au4rSI875kSjgUREdU1ZpGsmDt3rnTp0kUaNWokzZs3lyFDhsjp06e1jrlz5468/vrr0rRpU7G1tZWwsDDNt1el0tPTNd86N2/eXCIjI6WoqKjS7eBEwby0bt1a0fP69+8vS5culV9++UVGjx4tgYGBmm8oO3TooLg9xkqClG2Tm5ub1j5/f39FMdu3b6/5uVevXpKSkiIiImlpaYr/Bk8++aT89ddfcunSJbGxsZG0tDQREfnzzz/F19dXUUx9qvMbe5HqS4AEBgbK2rVrtT78FxcXS3R0tPTo0UNRzAfHuOwVRd7e3opiitxPnixcuFB8fHzExcVFZsyYIWfOnFEcT8Q4CRB9li9fXu0xMzMzq6XdycnJMmrUKOnUqZO0b99eBg0aJN9++62UlJQoild2zJ977jlNQvHgwYMSGBhoUFt53uMchIiIqKZU9tyn/AblarB3715ERETg0KFD2L17N4qKitC/f3/k5eVpjnnrrbfw448/IiYmBnv37sW1a9cQFham2V9cXIyQkBDcvXsXBw4cwJo1axAdHY333nuvJrpE1eTXX3/V+1BatDIjIwMRERHo3Lkz1q5di5CQEAQFBSE7O9ug1UAmTZqEF198ETExMcjMzERQUJCmjYasBiJlaoA8/fTTevdVxZ07dzQ/5+fno0OHDgDu120oLi5WFLN0KUw3N7dqWwoTAFJTU9GhQwd06tQJp06dQkhICB577DG4u7tX2/3/7dq1w7BhwwyOU/r/TtOmTeHj4wMfHx80bdpUs12J0lVWrl+/joULF1bbKiu2traYNm0afvvtN2zatAmZmZno0qULevXqhbVr1yqK6eXlhV69emHmzJlo2bIlZs6cqWgVoAf973//K/eYPXu25mel8vLyNH/Dv/76C6mpqVCr1Qa3t3Tp0mPHjuH48eP48ccfMWLEiGpZZvTSpUsIDQ0FAHTv3h35+fkGx6zrOAchIiIycY8ic1JZGRkZAkBzeWtWVpZYWlpKTEyM5pjff/9dAMjBgwdFRGT79u1iYWGh9U3HsmXLxM7OTnPP8MPwWw3To1KptGpWlH1YWloqitm2bdty2z799FPp3LmzPPHEE4rb+uAVCR9//LEEBAQYXF+jf//+Ol+T165dk65duyqKOXnyZHnjjTfk9u3bMnPmTFm/fr2UlJTI9u3b5emnn1YU01g1AIxxG4yx64BkZGTIsWPH5NixY5KRkWFQrLNnz8pTTz0ljRo1kj59+kh6erqIiNy4cUNWrlypOK6u12Rubq6sXLlSnnrqKYNj7t+/X8aNGyeNGzeWnj17ypo1axS3VaVSSWBgoPTp00fzsLa2lj59+ih+va5Zs0asra2lZcuWkpCQIC4uLhIQECDNmjXTvHaV2rFjh6SmpoqISHx8vMyaNcugmI899pgsWbJEFi9eLK1atdLaV/YqKSV43iuPcxAiIqJHwyxuA3nQ2bNnBYCcOHFCRO7fUw+g3P3Q7u7usmjRIhEReffdd8tdLp2WliYAJDk5WefvKSgokOzsbM3j8uXLnCiYGGMU7QwNDZUdO3aU2/7ZZ5+JSqVSFFPEeEkQfbKysuTSpUuKnltYWChvvvmm2NnZiaenp6hUKqlfv74EBwdrbt+oqsOHD+utARAfH68opohxboMxRgKkrPPnz0tSUpIkJSXJ+fPnDY5nDIbc8qSPMRIgIiKrV6+WwMBArf/LPT09FccTuV8I8+LFi3L8+HGxt7fXFMc9e/asQQmAyMhI8fPzEx8fH5k3b574+vrKv/71L+nSpYu8/fbbimKGh4drPUoL4V65ckX69eunuK0i/ICsC+cgREREj4bZJSuKi4slJCREa2L7zTffiJWVVbljAwICNIXHXn31Venfv7/W/ry8PAEg27dv1/m7Zs+eLQDKPThRMB3GKNpZUFAgBQUFOvdduXJFUUwR4yVBKqK0bkepvLw8+fXXXyU5OVkyMzOrqVV/q44aAGU/AIwZM0Zrn9IPlcZIgIiInDp1SgICAsTZ2Vm6du0qXbt2FWdnZwkICJCTJ08qjmuMVVaUrqJREWMkQEpV9+otZdvq4eGhd19V+fj4SGFhofz1119iY2OjKY6am5srTz75pOK4xsJkhTbOQYiIiB4ds6hZUVZERAROnjyJDRs2GP13vf3228jOztY8Ll++bPTfSVWzZMkS/OMf/9C5b/ny5YpiNmjQAA0aNNC577HHHlMUEwA2bNhQrqYEAEydOtWg15Yx6naUsrGxgZ+fHzp27IhmzZoBANq0aaMoVlpaGvr27YtWrVph6tSpKCgoQLNmzWBhYYEePXoobmOLFi2Qk5MD4H5NiFJqtRrW1taKYooR6oAAwMsvv4wZM2ZArVbj8OHDOHz4MNRqNaZPn46XX35ZUcxPP/0UvXv3RpcuXbB+/Xr0798fO3fuxNChQ7FkyRLFbW3atGm5bbdu3VIcDwASEhIMen5FPDw8sGvXLtja2qJnz54oLCw0KJ6FhQVOnTqFffv2IS8vD/v37wcAnD59WnHdFuD+/y9WVlZo0qQJHBwc4OjoCOB+jRArKyvFcQsLC7F582YsXrwYS5cuRVJSkuJYpB/nIERERKanfk03AAAmT56MrVu34qeffkLLli01252dnXH37l1kZWXBwcFBs/3GjRtwdnbWHHPkyBGteDdu3NDs06WiD61EVVXRa8mQJEiHDh3g6emp80P0zZs3FcWsqDCl0gTIpEmT8MILL6B79+5YsmQJgoKCEBcXh8aNGxtUYHTnzp06t9vY2GDTpk2KYpYmQOzs7KotAQIAWVlZeOGFF8ptf/HFF/F///d/imJGR0fj9OnTyM3Nhbe3N06ePAkvLy9kZmaiT58+mDJliqK4qampCA8Ph4WFBdatW4fp06cjKSkJjo6O2Lp1K9q3b1/lmLoSINVJpVJh2rRpCA4Oxr59+wyK9eGHH6JXr16wsLDAhg0bMGvWLKjVaqjVaqxYsUJx3CZNmmDp0qXIzs6Go6MjFixYgLFjxyIuLg62traKYiYlJSE8PBwODg44c+YMevbsia+++gqNGjVCbGysQf+/0N84ByEiIjJRj+IyD31KSkokIiJCXF1d5Y8//ii3v7S41aZNmzTbTp8+rbO4Veml0iIiX3/9tdjZ2em95P9BvByWTJEx6nYYo3CpsQqMVsTQ22AeZEgdEBHjLF1a9m/n7u6utc+Q2xWMXbfjQdU9VtW9fO29e/fkl19+0TqHKHH27FkJDQ2VsLAwSU9PlxkzZkijRo3E399fU3Szqjp27Kg5Nx45ckRGjx4tIiIrVqyQIUOGGNRenvc4ByEiIqopZlGzYtKkSWJvby979uwRtVqteeTn52uOmThxori7u0tiYqL88ssv0qNHD63J/71796Rdu3bSv39/SU1Nlbi4OGnevHmVCppxokCmyBh1O4yRADFWgdHjx4/rfTg7OyuOq48hH6rPnj0rffv2FTs7O/H29pa2bduKnZ2dPP3003LmzBlFMY21yoox6nYYa6yMvXqLqXtwPDp16qT5uU2bNgbF5nmPcxAiIqKaYhbJCugoMAVAoqKiNMfcuXNHXn/9dWnSpInY2NjI888/L2q1WivOxYsXZeDAgdKwYUNxdHSUadOmSVFRUaXbwYkC1RXGSIAYq8CoMa4CMXYCpDqXLjXWKivGKFxqjLESMa+rQEpXkKhOTz31lCQmJoqISExMjAwYMECzj8kKw3EOQkREVDMqe+5TiRhQVa6WyMnJgb29PbKzs2FnZ1fTzSEyK6UFD3Xdg3316lXF99V7eXlh//79cHV1LbfPzc1NUVE6CwsLvXVArl69irt37ypqqzHqQOhy8+ZNNGnSBBYWymsjBwcHIyYmptz/dWq1GqGhoTh8+HCVYxpjrACgY8eOSElJAQC4u7vj0qVLmn0dOnRAampqlWNWVLclODgYarW6yjGB+68tPz8/jBs3DqNGjaqWOh5Hjx5FWFgYMjMz4eLigi1btsDPzw/Xr1/HihUr8N577ymOzfOe6eBYEBFRXVPZc59JFNgkIvNlrAKjgwcPRlpams4PwCEhIYpienh4YN++fXo/VCs1ZcoUzJkzB1lZWXj22Wfx0UcfYdu2bdi8eTMiIyOxa9euKsdMS0vD+PHjcfHiRYSGhmLu3LmalVt69OiBgwcPKmqrMQqXGmOsAOOs3mKMwrUA4Ovri/feew+rVq3CO++8g0GDBmH8+PHo16+f4pgBAQG4fPkybt68qRl74H7hRkMSFURERETmwGSWLiUiKssYy9eWfqjWxZAP1Tk5OQgNDUV4eDhEBKNHjwYAhIaGIiMjQ1HM0lVWYmJikJmZiaCgIM2KLYassqKPvb09goKCFD3XGGMFGGf52tKE1YULF8o9WrRoobitlpaWeOGFF7B9+3b8/vvvaNeuHSZMmABPT0988MEHimJmZWUBgFaigoiIiKiu4JUVRFRnLFmyRO8+Qz5UG+MKgIyMDERERAAA1q5di7lz5yIoKAi7d++GSqVS3FZjLF9bkTZt2uCPP/5Q9FxzugqkLDc3N8yaNQuzZs1CQkICVq9erSiOk5MTBg4ciHHjxmHQoEEG3f5DREREZG6YrCAiMlDpFQB2dnbVdgXAnTt3tP79zjvvwMrKSusKCyWMcRvEo06A2NvbIyAgQFESRFfC6tatW2jSpIlBCSsrKyud24OCghRfseLl5YVevXph5syZmDhxIsaMGYNXXnkFbdq0UdxOIiIiInPBZAURkYEqugIgJiZGUUwfHx/ExcVhwIABmm2RkZGwsLBAZGSkopiAcep2GKsOhDGSIMePH8fYsWN1FkPdtm0b/Pz8FMU9dOiQoudVxNbWFtOmTcO0adNw4MABrF69Gl26dEGHDh0wfvx4jBkzptp/JxEREZGpYLKCiMhI7O3tYW9vr+i5GzZs0Ll96tSpGDZsmOI2mVPhUmMkQd544w29xVCnTZumqBgqcL8g6rhx45Cenq4piFp6VY0hBVFLBQYGIjAwEEuWLMGGDRuwYsUKJiuIiIioVuMNsEREJqhBgwZ6V1oxZJUVcypcaoximMYohgrcL4j64osvVmtBVF1JGltbW4wbNw779u1T3FYiIiIic8BkBRERGcRYq4EYIwlijGKowN8FUTt37oy1a9ciJCQEQUFByM7OVlwQNSEhQXF7iIiIiMwdbwMhIiKTZIzVW4xRDBUwTkHUpk2b6t1nyCorREREROaAyQoiIqozjFEMFTBOQdRHvcoKERERkSlRiSHXvdYSOTk5sLe3R3Z2Nuzs7Gq6OUREZGYKCwsBQGedkatXryqqM2JhYaG3wOjVq1dx9+7dqjf0/+N5z3RwLIiIqK6p7LmPV1bg7/uUc3JyarglRERkzkqTFmU1btxY0fnFzc0NO3bsgIuLS7l9Tz75pEHnrNLn8vuKmsc5CBER1TWVnYcwWYG/L6c1ZIk9IiKi6ubt7a13n9Jlccu6fft2tcQh5TgHISKiuuph8xDeBgKgpKQE165dQ+PGjRVXbS+Vk5MDNzc3XL58uVZdzlkb+1Ub+wSwX+amNvarNvYJqF39EhHcvn0brq6usLDgwmA1iXOQh2O/zEdt7BPAfpmT2tgnoPb1q7LzEF5Zgfv3Bbds2bJaY9rZ2dWKF9KDamO/amOfAPbL3NTGftXGPgG1p1+8osI0cA5SeeyX+aiNfQLYL3NSG/sE1K5+VWYewq9TiIiIiIiIiMikMFlBRERERERERCaFyYpq1qBBA8yePVvn8nXmrDb2qzb2CWC/zE1t7Fdt7BNQe/tFtUdtfY2yX+ajNvYJYL/MSW3sE1B7+/UwLLBJRERERERERCaFV1YQERERERERkUlhsoKIiIiIiIiITAqTFURERERERERkUpisICIiIiIiIiKTwmSFAl9++SU8PT1hbW2Nbt264ciRIxUeHxMTA29vb1hbW8PPzw/bt29/RC2tnHnz5iEgIACNGzeGk5MTQkNDcebMmQqfEx0dDZVKpfWwtrZ+RC2unDlz5pRro7e3d4XPMfWx8vT0LNcnlUqFiIgInceb6jj99NNPeO655+Dq6gqVSoXNmzdr7RcRvPfee3BxcUHDhg3Rr18/nD179qFxq/rerG4V9auoqAgzZsyAn58fbG1t4erqijFjxuDatWsVxlTyOq5ODxur8PDwcu0bMGDAQ+Oa8lgB0Pk+U6lU+PTTT/XGrOmxorqBcxDTPbeVxTmI6Y4T5yDmMwcBauc8hHOQymOyooq+//57TJ06FbNnz0ZycjL8/f0RHByMjIwMnccfOHAAI0aMwLhx45CSkoLQ0FCEhobi5MmTj7jl+u3duxcRERE4dOgQdu/ejaKiIvTv3x95eXkVPs/Ozg5qtVrzSE9Pf0QtrjxfX1+tNu7bt0/vseYwVkePHtXqz+7duwEAL730kt7nmOI45eXlwd/fH19++aXO/Z988gk+//xzLF++HIcPH4atrS2Cg4NRUFCgN2ZV35vGUFG/8vPzkZycjHfffRfJycn473//izNnzmDw4MEPjVuV13F1e9hYAcCAAQO02vfdd99VGNPUxwqAVn/UajVWr14NlUqFF154ocK4NTlWVPtxDvI3Uzy3PYhzENMcJ85BzGcOAtTOeQjnIFUgVCVdu3aViIgIzb+Li4vF1dVV5s2bp/P4oUOHSkhIiNa2bt26yYQJE4zaTkNkZGQIANm7d6/eY6KiosTe3v7RNUqB2bNni7+/f6WPN8exmjJlijz++ONSUlKic785jBMAiY2N1fy7pKREnJ2d5dNPP9Vsy8rKkgYNGsh3332nN05V35vG9mC/dDly5IgAkPT0dL3HVPV1bEy6+jR27FgZMmRIleKY41gNGTJE+vbtW+ExpjRWVDtxDnKfOZzbOAcxj3HiHMR85iAitXMewjlIxXhlRRXcvXsXx44dQ79+/TTbLCws0K9fPxw8eFDncw4ePKh1PAAEBwfrPd4UZGdnAwCaNm1a4XG5ubnw8PCAm5sbhgwZglOnTj2K5lXJ2bNn4erqilatWmHkyJG4dOmS3mPNbazu3r2L9evX45VXXoFKpdJ7nDmMU1kXLlzA9evXtcbC3t4e3bp10zsWSt6bpiA7OxsqlQoODg4VHleV13FN2LNnD5ycnNC2bVtMmjQJN2/e1HusOY7VjRs3sG3bNowbN+6hx5r6WJH54hxEmzmc2zgHMY9xKotzkPLM4bxWm+chdX0OwmRFFWRmZqK4uBgtWrTQ2t6iRQtcv35d53OuX79epeNrWklJCd5880089dRTaNeund7j2rZti9WrV2PLli1Yv349SkpKEBgYiCtXrjzC1lasW7duiI6ORlxcHJYtW4YLFy6gZ8+euH37ts7jzW2sNm/ejKysLISHh+s9xhzG6UGlf++qjIWS92ZNKygowIwZMzBixAjY2dnpPa6qr+NHbcCAAVi7di0SEhKwYMEC7N27FwMHDkRxcbHO481xrNasWYPGjRsjLCyswuNMfazIvHEO8jdzOLdxDmIe4/QgzkG0mcN5rbbPQ+r6HKR+TTeATEtERAROnjz50HucevTogR49emj+HRgYCB8fH3z99df48MMPjd3MShk4cKDm5/bt26Nbt27w8PDAxo0bK5WdNHWrVq3CwIED4erqqvcYcxinuqioqAhDhw6FiGDZsmUVHmvqr+Phw4drfvbz80P79u3x+OOPY8+ePQgKCqrBllWf1atXY+TIkQ8tDGfqY0Vk6jgHMR+cg5iv2jQHAWr/PKSuz0F4ZUUVODo6ol69erhx44bW9hs3bsDZ2Vnnc5ydnat0fE2aPHkytm7diqSkJLRs2bJKz7W0tETHjh1x7tw5I7XOcA4ODmjTpo3eNprTWKWnpyM+Ph7jx4+v0vPMYZxK/95VGQsl782aUjpJSE9Px+7duyv8RkOXh72Oa1qrVq3g6Oiot33mNFYA8PPPP+PMmTNVfq8Bpj9WZF44B9HPHM5tnIOYxzhxDlIxcziv1aZ5COcgTFZUiZWVFTp37oyEhATNtpKSEiQkJGhljsvq0aOH1vEAsHv3br3H1wQRweTJkxEbG4vExER4eXlVOUZxcTFOnDgBFxcXI7SweuTm5uL8+fN622gOY1UqKioKTk5OCAkJqdLzzGGcvLy84OzsrDUWOTk5OHz4sN6xUPLerAmlk4SzZ88iPj4ezZo1q3KMh72Oa9qVK1dw8+ZNve0zl7EqtWrVKnTu3Bn+/v5Vfq6pjxWZF85B9DOHcxvnIOYxTpyDVMwczmu1aR7COQi4GkhVbdiwQRo0aCDR0dHy22+/yWuvvSYODg5y/fp1EREZPXq0zJw5U3P8/v37pX79+rJw4UL5/fffZfbs2WJpaSknTpyoqS6UM2nSJLG3t5c9e/aIWq3WPPLz8zXHPNiv999/X3bu3Cnnz5+XY8eOyfDhw8Xa2lpOnTpVE13Qadq0abJnzx65cOGC7N+/X/r16yeOjo6SkZEhIuY5ViL3Kxa7u7vLjBkzyu0zl3G6ffu2pKSkSEpKigCQRYsWSUpKiqYi9fz588XBwUG2bNkiv/76qwwZMkS8vLzkzp07mhh9+/aVL774QvPvh703a7pfd+/elcGDB0vLli0lNTVV671WWFiot18Pex3XZJ9u374tkZGRcvDgQblw4YLEx8dLp06dpHXr1lJQUKC3T6Y+VqWys7PFxsZGli1bpjOGqY0V1X6cg9xnque2sjgHMd1x4hzEfOYgD+uXuc5DOAepPCYrFPjiiy/E3d1drKyspGvXrnLo0CHNvt69e8vYsWO1jt+4caO0adNGrKysxNfXV7Zt2/aIW1wxADofUVFRmmMe7Nebb76p+Ru0aNFCnn32WUlOTn70ja/AsGHDxMXFRaysrOSxxx6TYcOGyblz5zT7zXGsRER27twpAOTMmTPl9pnLOCUlJel8zZW2vaSkRN59911p0aKFNGjQQIKCgsr118PDQ2bPnq21raL35qNQUb8uXLig972WlJSkt18Pex3XZJ/y8/Olf//+0rx5c7G0tBQPDw959dVXy53szW2sSn399dfSsGFDycrK0hnD1MaK6gbOQUz33FYW5yCmO06cg5jPHORh/TLXeQjnIJWnEhFRelUGEREREREREVF1Y80KIiIiIiIiIjIpTFYQERERERERkUlhsoKIiIiIiIiITAqTFURERERERERkUpisICIiIiIiIiKTwmQFEREREREREZkUJiuIiIiIiIiIyKQwWUFEj1x4eDhCQ0P17p8zZw46dOjwyNpDREREdQPnIETmg8kKIjI5kZGRSEhIqOlmEBERUR3DOQiR6ahf0w0gIvNx9+5dWFlZGf33NGrUCI0aNTL67yEiIiLzwDkIUd3DKyuISK8+ffpg8uTJePPNN+Ho6Ijg4GAsWrQIfn5+sLW1hZubG15//XXk5uZqnhMdHQ0HBwfs3LkTPj4+aNSoEQYMGAC1Wq339xw9ehTNmzfHggULAJS/BLP0ks2FCxfCxcUFzZo1Q0REBIqKijTHqNVqhISEoGHDhvDy8sK3334LT09PLF68uNr/LkRERGRcnIMQEZMVRFShNWvWwMrKCvv378fy5cthYWGBzz//HKdOncKaNWuQmJiI6dOnaz0nPz8fCxcuxLp16/DTTz/h0qVLiIyM1Bk/MTERzzzzDD7++GPMmDFDbzuSkpJw/vx5JCUlYc2aNYiOjkZ0dLRm/5gxY3Dt2jXs2bMHP/zwA1asWIGMjIxq+RsQERHRo8c5CFHdxttAiKhCrVu3xieffKL5d9u2bTU/e3p64qOPPsLEiRPx1VdfabYXFRVh+fLlePzxxwEAkydPxgcffFAudmxsLMaMGYOVK1di2LBhFbajSZMmWLp0KerVqwdvb2+EhIQgISEBr776Kk6fPo34+HgcPXoUXbp0AQCsXLkSrVu3NqjvREREVHM4ByGq25isIKIKde7cWevf8fHxmDdvHk6fPo2cnBzcu3cPBQUFyM/Ph42NDQDAxsZGM0kAABcXl3LfMBw+fBhbt27Fpk2bKqzKXcrX1xf16tXTinnixAkAwJkzZ1C/fn106tRJs/+JJ55AkyZNqtxfIiIiMg2cgxDVbbwNhIgqZGtrq/n54sWLGDRoENq3b48ffvgBx44dw5dffgngfuGrUpaWlloxVCoVRERr2+OPPw5vb2+sXr1a675PfXTFLCkpqXJ/iIiIyDxwDkJUtzFZQUSVduzYMZSUlOCzzz5D9+7d0aZNG1y7dk1RLEdHRyQmJuLcuXMYOnRopSYL+rRt2xb37t1DSkqKZtu5c+dw69YtxTGJiIjIdHAOQlT3MFlBRJX2xBNPoKioCF988QXS0tKwbt06LF++XHE8JycnJCYm4vTp0xgxYgTu3bunKI63tzf69euH1157DUeOHEFKSgpee+01NGzYECqVSnH7iIiIyDRwDkJU9zBZQUSV5u/vj0WLFmHBggVo164dvvnmG8ybN8+gmM7OzkhMTMSJEycwcuRIFBcXK4qzdu1atGjRAr169cLzzz+PV199FY0bN4a1tbVB7SMiIqKaxzkIUd2jkgdv4iIiqgWuXLkCNzc3xMfHIygoqKabQ0RERHUE5yBE1YPJCiKqFRITE5Gbmws/Pz+o1WpMnz4dV69exR9//FGuMBYRERFRdeEchMg4uHQpEdUKRUVFeOedd5CWlobGjRsjMDAQ33zzDScJREREZFScgxAZB6+sICIiIiIiIiKTwgKbRERERERERGRSmKwgIiIiIiIiIpPCZAURERERERERmRQmK4iIiIiIiIjIpDBZQUREREREREQmhckKIiIiIiIiIjIpTFYQERERERERkUlhsoKIiIiIiIiITAqTFURERERERERkUv4frJL0ZU2EhckAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 1280x480 with 2 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import scanpy\n",
+    "\n",
+    "# ScanPy `rank_genes_groups` assumes that the X data is logged.\n",
+    "scanpy.pp.log1p(adata.X, copy=False)\n",
+    "\n",
+    "# do ranking\n",
+    "scanpy.tl.rank_genes_groups(adata, \"cell_type_ontology_term_id\", method=\"t-test\")\n",
+    "\n",
+    "# visualize ranking\n",
+    "scanpy.pl.rank_genes_groups(adata)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api/python/notebooks/census_summary_cell_counts.ipynb b/api/python/notebooks/census_summary_cell_counts.ipynb
new file mode 100644
index 000000000..f651a5347
--- /dev/null
+++ b/api/python/notebooks/census_summary_cell_counts.ipynb
@@ -0,0 +1,385 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Census summary cell counts example\n",
+    "\n",
+    "*Goal:* demonstrate basic use of the `census_summary_cell_counts` dataframe.\n",
+    "\n",
+    "Each Cell Census contains a top-level dataframe summarizing counts of various cell labels. You can read this into a Pandas DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>organism</th>\n",
+       "      <th>category</th>\n",
+       "      <th>ontology_term_id</th>\n",
+       "      <th>unique_cell_count</th>\n",
+       "      <th>total_cell_count</th>\n",
+       "      <th>label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Homo sapiens</td>\n",
+       "      <td>all</td>\n",
+       "      <td>na</td>\n",
+       "      <td>22044980</td>\n",
+       "      <td>34115852</td>\n",
+       "      <td>na</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Homo sapiens</td>\n",
+       "      <td>assay</td>\n",
+       "      <td>EFO:0008722</td>\n",
+       "      <td>177719</td>\n",
+       "      <td>260396</td>\n",
+       "      <td>Drop-seq</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Homo sapiens</td>\n",
+       "      <td>assay</td>\n",
+       "      <td>EFO:0008780</td>\n",
+       "      <td>0</td>\n",
+       "      <td>51304</td>\n",
+       "      <td>inDrop</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Homo sapiens</td>\n",
+       "      <td>assay</td>\n",
+       "      <td>EFO:0008913</td>\n",
+       "      <td>133511</td>\n",
+       "      <td>133511</td>\n",
+       "      <td>single-cell RNA sequencing</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Homo sapiens</td>\n",
+       "      <td>assay</td>\n",
+       "      <td>EFO:0008919</td>\n",
+       "      <td>44721</td>\n",
+       "      <td>161998</td>\n",
+       "      <td>Seq-Well</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1147</th>\n",
+       "      <td>Mus musculus</td>\n",
+       "      <td>tissue_general</td>\n",
+       "      <td>UBERON:0002113</td>\n",
+       "      <td>164881</td>\n",
+       "      <td>188361</td>\n",
+       "      <td>kidney</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1148</th>\n",
+       "      <td>Mus musculus</td>\n",
+       "      <td>tissue_general</td>\n",
+       "      <td>UBERON:0002365</td>\n",
+       "      <td>15577</td>\n",
+       "      <td>31154</td>\n",
+       "      <td>exocrine gland</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1149</th>\n",
+       "      <td>Mus musculus</td>\n",
+       "      <td>tissue_general</td>\n",
+       "      <td>UBERON:0002367</td>\n",
+       "      <td>37715</td>\n",
+       "      <td>130135</td>\n",
+       "      <td>prostate gland</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1150</th>\n",
+       "      <td>Mus musculus</td>\n",
+       "      <td>tissue_general</td>\n",
+       "      <td>UBERON:0002368</td>\n",
+       "      <td>13322</td>\n",
+       "      <td>26644</td>\n",
+       "      <td>endocrine gland</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1151</th>\n",
+       "      <td>Mus musculus</td>\n",
+       "      <td>tissue_general</td>\n",
+       "      <td>UBERON:0002371</td>\n",
+       "      <td>54737</td>\n",
+       "      <td>109474</td>\n",
+       "      <td>bone marrow</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1152 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          organism        category ontology_term_id  unique_cell_count  \\\n",
+       "0     Homo sapiens             all               na           22044980   \n",
+       "1     Homo sapiens           assay      EFO:0008722             177719   \n",
+       "2     Homo sapiens           assay      EFO:0008780                  0   \n",
+       "3     Homo sapiens           assay      EFO:0008913             133511   \n",
+       "4     Homo sapiens           assay      EFO:0008919              44721   \n",
+       "...            ...             ...              ...                ...   \n",
+       "1147  Mus musculus  tissue_general   UBERON:0002113             164881   \n",
+       "1148  Mus musculus  tissue_general   UBERON:0002365              15577   \n",
+       "1149  Mus musculus  tissue_general   UBERON:0002367              37715   \n",
+       "1150  Mus musculus  tissue_general   UBERON:0002368              13322   \n",
+       "1151  Mus musculus  tissue_general   UBERON:0002371              54737   \n",
+       "\n",
+       "      total_cell_count                       label  \n",
+       "0             34115852                          na  \n",
+       "1               260396                    Drop-seq  \n",
+       "2                51304                      inDrop  \n",
+       "3               133511  single-cell RNA sequencing  \n",
+       "4               161998                    Seq-Well  \n",
+       "...                ...                         ...  \n",
+       "1147            188361                      kidney  \n",
+       "1148             31154              exocrine gland  \n",
+       "1149            130135              prostate gland  \n",
+       "1150             26644             endocrine gland  \n",
+       "1151            109474                 bone marrow  \n",
+       "\n",
+       "[1152 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import cell_census\n",
+    "\n",
+    "census = cell_census.open_soma()\n",
+    "census_summary_cell_counts = census[\"census_info\"][\"summary_cell_counts\"].read_as_pandas_all()\n",
+    "\n",
+    "# Dropping the soma_joinid column as it isn't useful in this demo\n",
+    "census_summary_cell_counts = census_summary_cell_counts.drop(columns=[\"soma_joinid\"])\n",
+    "\n",
+    "census_summary_cell_counts"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This dataframe is precomputed from the experiments in the Cell Census, and is intended to simplify quick looks at the Census contents.\n",
+    "\n",
+    "You can do similar group statistics using Pandas `groupby` functions. \n",
+    "\n",
+    "The code below reproduces the above counts using full `obs` dataframe in the `Homo_sapiens` experiment.\n",
+    "\n",
+    "Keep in mind that the Cell Census is very large, and any queries will return significant amount of data. You can manage that by narrowing the query request using `column_names` and `value_filter` in your query."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cell_type_ontology_term_id</th>\n",
+       "      <th>cell_type</th>\n",
+       "      <th>size</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>CL:0000001</td>\n",
+       "      <td>primary cultured cell</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>CL:0000003</td>\n",
+       "      <td>native cell</td>\n",
+       "      <td>611233</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>CL:0000006</td>\n",
+       "      <td>neuronal receptor cell</td>\n",
+       "      <td>2502</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>CL:0000019</td>\n",
+       "      <td>sperm</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>CL:0000031</td>\n",
+       "      <td>neuroblast (sensu Vertebrata)</td>\n",
+       "      <td>2355</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>540</th>\n",
+       "      <td>CL:4023041</td>\n",
+       "      <td>L5 extratelencephalic projecting glutamatergic...</td>\n",
+       "      <td>2361</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>541</th>\n",
+       "      <td>CL:4023051</td>\n",
+       "      <td>vascular leptomeningeal cell</td>\n",
+       "      <td>3937</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>542</th>\n",
+       "      <td>CL:4023070</td>\n",
+       "      <td>caudal ganglionic eminence derived GABAergic c...</td>\n",
+       "      <td>8463</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>543</th>\n",
+       "      <td>CL:4028002</td>\n",
+       "      <td>alveolar capillary type 1 endothelial cell</td>\n",
+       "      <td>16048</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>544</th>\n",
+       "      <td>CL:4028003</td>\n",
+       "      <td>alveolar capillary type 2 endothelial cell</td>\n",
+       "      <td>7157</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>545 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    cell_type_ontology_term_id  \\\n",
+       "0                   CL:0000001   \n",
+       "1                   CL:0000003   \n",
+       "2                   CL:0000006   \n",
+       "3                   CL:0000019   \n",
+       "4                   CL:0000031   \n",
+       "..                         ...   \n",
+       "540                 CL:4023041   \n",
+       "541                 CL:4023051   \n",
+       "542                 CL:4023070   \n",
+       "543                 CL:4028002   \n",
+       "544                 CL:4028003   \n",
+       "\n",
+       "                                             cell_type    size  \n",
+       "0                                primary cultured cell      80  \n",
+       "1                                          native cell  611233  \n",
+       "2                               neuronal receptor cell    2502  \n",
+       "3                                                sperm      11  \n",
+       "4                        neuroblast (sensu Vertebrata)    2355  \n",
+       "..                                                 ...     ...  \n",
+       "540  L5 extratelencephalic projecting glutamatergic...    2361  \n",
+       "541                       vascular leptomeningeal cell    3937  \n",
+       "542  caudal ganglionic eminence derived GABAergic c...    8463  \n",
+       "543         alveolar capillary type 1 endothelial cell   16048  \n",
+       "544         alveolar capillary type 2 endothelial cell    7157  \n",
+       "\n",
+       "[545 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "human = census[\"census_data\"][\"homo_sapiens\"]\n",
+    "obs_df = human.obs.read_as_pandas_all(column_names=[\"cell_type_ontology_term_id\", \"cell_type\"])\n",
+    "obs_df.groupby(by=[\"cell_type_ontology_term_id\", \"cell_type\"], as_index=False, observed=True).size()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.5 ('venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3da8ec1c162cd849e59e6ea2824b2e353dce799884e910aae99411be5277f953"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/cell_census_builder/REAMDE.md b/cell_census_builder/REAMDE.md
new file mode 100644
index 000000000..a32e8f987
--- /dev/null
+++ b/cell_census_builder/REAMDE.md
@@ -0,0 +1,61 @@
+# README
+
+This is a tool to build the SOMA instantiation of the Cell Census schema, as specified in this doc:
+
+https://docs.google.com/document/d/1GKndzCk9q_1SdYOq3BeCxWgp-o2NSQkEmSBaBPKnNI8/
+
+CAVEATS (READ THIS):
+
+1. The code is written to the still-rapidly-evolving and **pre-release** Python SOMA API, _and will be subject to change_ as the SOMA API and `tiledbsoma` evolve and stabilize.
+2. The schema implemented by this code is still evolving and subject to change.
+
+## Usage
+
+TL;DR:
+
+- given a set of H5AD files, which comply with cellxgene 3.0 schema,
+- create several SOMAExperiment aggregations representing mouse & human slices of the entire collection, and
+- embed experiments into a single SOMACollection, along with other metadata about the aggregation/census
+
+The build process:
+
+- Pass 1: stage all source H5AD files
+- Pass 2: build the axis dataframes for each experiment. This is a single-threaded pass, building dense dataframes.
+- Pass 3: build the X layers for each experiment. This is a concurrent pass, reading/writing X layers in parallel.
+- Pass 4: optional, validate the above
+
+Modes of operation:
+a) (default) creating the entire "cell census" using all files currently in the CELLxGENE repository.
+b) creating a smaller "cell census" from a user-provided list of files (a "manifest")
+
+### Mode (a) - creating the full cell census from the entire CELLxGENE (public) corpus:
+
+- On a large-memory machine with _ample_ free (local) disk (eg, 3/4 TB or more) and swap (1 TB or more)
+- To create an cell census at `<census_path>`, execute:
+  > $ python -m cell_census_builder <census_path> -mp --max-workers 12
+
+If you run out of memory, reduce `--max-workers`. You can also try a higher number if you have lots of CPU & memory.
+
+### Mode (b) - creating a cell census from a user-provided list of H5AD files:
+
+- Create a manifest file, in CSV format, containing two columns: dataset_id, h5ad_uri. Example:
+  ```csv
+  53d208b0-2cfd-4366-9866-c3c6114081bc, /files/53d208b0-2cfd-4366-9866-c3c6114081bc.h5ad
+  559ed814-a9c9-4b77-a0e6-7da7b907fe3a, /files/559ed814-a9c9-4b77-a0e6-7da7b907fe3a.h5ad
+  5b93b8fc-7c9a-45bd-ad3f-dc883137de30, /files/5b93b8fc-7c9a-45bd-ad3f-dc883137de30.h5ad
+  ```
+  You can specify a file system path or a URI in the second field
+- To create an cell census at `<census_path>`, execute:
+  > $ python -m cell_census_builder <census_path> --manifest <the_manifest_file.csv>
+
+### Other info
+
+There are more options discoverable via the `--help` command line option.
+
+Note on required host resources:
+
+- all H5AD files not on the local disk will be downloaded/cached locally. There must be
+  suffiicent local file system space. Location of cache can be controlled with the
+  environment variable `FSSPEC_CACHE_DIR`
+- each H5AD will be read into memory, in its entirety. Sufficient RAM must be present to
+  allow for this (and to do so for multiple H5ADs concurrently if you use the `--multi-process` option)
diff --git a/cell_census_builder/__init__.py b/cell_census_builder/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/cell_census_builder/__main__.py b/cell_census_builder/__main__.py
new file mode 100644
index 000000000..9c040d40b
--- /dev/null
+++ b/cell_census_builder/__main__.py
@@ -0,0 +1,315 @@
+import argparse
+import gc
+import logging
+import multiprocessing
+import os.path
+import sys
+from datetime import datetime, timezone
+from typing import List, Tuple
+
+import tiledbsoma as soma
+
+from .anndata import open_anndata
+from .census_summary import create_census_summary
+from .consolidate import consolidate
+from .datasets import Dataset, assign_soma_joinids, create_dataset_manifest
+from .experiment_builder import ExperimentBuilder, populate_X_layers
+from .globals import CENSUS_SCHEMA_VERSION, CXG_SCHEMA_VERSION, RNA_SEQ, TileDB_Ctx
+from .manifest import load_manifest
+from .mp import process_initializer
+from .source_assets import stage_source_assets
+from .summary_cell_counts import create_census_summary_cell_counts
+from .util import uricat
+from .validate import validate
+
+
+def make_experiment_builders(base_uri: str, args: argparse.Namespace) -> List[ExperimentBuilder]:
+    """
+    Define all soma.Experiments to build in the census.
+
+    Functionally, this defines per-experiment name, anndata filter, etc.
+    It also loads any required per-Experiment assets.
+    """
+    GENE_LENGTH_BASE_URI = (
+        "https://raw.githubusercontent.com/chanzuckerberg/single-cell-curation/"
+        "100f935eac932e1f5f5dadac0627204da3790f6f/cellxgene_schema_cli/cellxgene_schema/ontology_files/"
+    )
+    GENE_LENGTH_URIS = [
+        GENE_LENGTH_BASE_URI + "genes_homo_sapiens.csv.gz",
+        GENE_LENGTH_BASE_URI + "genes_mus_musculus.csv.gz",
+        GENE_LENGTH_BASE_URI + "genes_sars_cov_2.csv.gz",
+    ]
+    experiment_builders = [  # The soma.Experiments we want to build
+        ExperimentBuilder(
+            base_uri=base_uri,
+            name="homo_sapiens",
+            anndata_cell_filter_spec=dict(organism_ontology_term_id="NCBITaxon:9606", assay_ontology_term_ids=RNA_SEQ),
+            gene_feature_length_uris=GENE_LENGTH_URIS,
+        ),
+        ExperimentBuilder(
+            base_uri=base_uri,
+            name="mus_musculus",
+            anndata_cell_filter_spec=dict(organism_ontology_term_id="NCBITaxon:10090", assay_ontology_term_ids=RNA_SEQ),
+            gene_feature_length_uris=GENE_LENGTH_URIS,
+        ),
+    ]
+
+    return experiment_builders
+
+
+def main() -> int:
+    parser = create_args_parser()
+    args = parser.parse_args()
+    assert args.subcommand in ["build", "validate"]
+
+    process_initializer(args.verbose)
+
+    # normalize our base URI - must include trailing slash
+    args.uri = args.uri if args.uri.endswith("/") else args.uri + "/"
+    soma_path = uricat(args.uri, args.build_tag, "soma")
+    assets_path = uricat(args.uri, args.build_tag, "h5ads")
+
+    # create the experiment builders
+    experiment_builders = make_experiment_builders(uricat(soma_path, "census_data"), args)
+
+    cc = 0
+    if args.subcommand == "build":
+        cc = build(args, soma_path, assets_path, experiment_builders)
+
+        # sanity check for build completion
+        assert cc != 0 or all(e.is_finished() for e in experiment_builders)
+
+    if cc == 0 and (args.subcommand == "validate" or args.validate):
+        cc = validate(args, experiment_builders)
+
+    return cc
+
+
+def build(
+    args: argparse.Namespace, soma_path: str, assets_path: str, experiment_builders: List[ExperimentBuilder]
+) -> int:
+    """
+    Approximately, build steps are:
+    1. Download manifest and copy/stage all source assets
+    2. Read all H5AD and create axis dataframe (serial)
+        * write obs/var dataframes
+        * accumulate overall shape of X
+    3. Read all H5AD assets again, write X layer (parallel)
+    4. Optional: validate
+
+    Returns
+    -------
+    int
+        Process completion code, 0 on success, non-zero indicating error,
+        suitable for providing to sys.exit()
+    """
+
+    # Don't clobber an existing census build
+    if os.path.exists(soma_path) or os.path.exists(assets_path):
+        logging.error("Census build path already exists - aborting build")
+        return 1
+
+    # Create top-level build directories
+    os.makedirs(soma_path, exist_ok=False)
+    os.makedirs(assets_path, exist_ok=False)
+
+    # Step 1 - get all source assets
+    datasets = build_step1_get_source_assets(args, assets_path)
+
+    # Step 2 - build axis dataframes
+    top_level_collection, filtered_datasets = build_step2_create_axis(
+        soma_path, assets_path, datasets, experiment_builders, args
+    )
+    assign_soma_joinids(filtered_datasets)
+    logging.info(f"({len(filtered_datasets)} of {len(datasets)}) suitable for processing.")
+    gc.collect()
+
+    # Step 3- create X layers
+    build_step3_create_X_layers(assets_path, filtered_datasets, experiment_builders, args)
+    gc.collect()
+
+    # Write out dataset manifest and summary information
+    create_dataset_manifest(top_level_collection["census_info"], filtered_datasets)
+    create_census_summary_cell_counts(
+        top_level_collection["census_info"], [e.census_summary_cell_counts for e in experiment_builders]
+    )
+    create_census_summary(top_level_collection["census_info"], experiment_builders, args.build_tag)
+
+    if args.consolidate:
+        consolidate(top_level_collection.uri)
+
+    return 0
+
+
+def create_top_level_collections(soma_path: str) -> soma.Collection:
+    """
+    Create the top-level SOMA collections for the Census.
+
+    Returns the top-most collection.
+    """
+    top_level_collection = soma.Collection(soma_path, ctx=TileDB_Ctx())
+    if top_level_collection.exists():
+        logging.error("Census already exists - aborting")
+        raise Exception("Census already exists - aborting")
+
+    top_level_collection.create()
+    # Set top-level metadata for the experiment
+    top_level_collection.metadata["created_on"] = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
+    top_level_collection.metadata["cxg_schema_version"] = CXG_SCHEMA_VERSION
+    top_level_collection.metadata["census_schema_version"] = CENSUS_SCHEMA_VERSION
+
+    # Create sub-collections for experiments, etc.
+    for n in ["census_info", "census_data"]:
+        cltn = soma.Collection(uricat(top_level_collection.uri, n), ctx=TileDB_Ctx()).create()
+        top_level_collection.set(n, cltn, relative=True)
+
+    return top_level_collection
+
+
+def build_step1_get_source_assets(args: argparse.Namespace, assets_path: str) -> List[Dataset]:
+    logging.info("Build step 1 - get source assets - started")
+
+    # Load manifest defining the datasets
+    datasets = load_manifest(args.manifest)
+    if len(datasets) == 0:
+        logging.error("No H5AD files in the manifest (or we can't find the files)")
+        raise AssertionError("No H5AD files in the manifest (or we can't find the files)")
+
+    # Testing/debugging hook - hidden option
+    if args.test_first_n is not None and args.test_first_n > 0:
+        # Process the N smallest datasets
+        datasets = sorted(datasets, key=lambda d: d.asset_h5ad_filesize)[0 : args.test_first_n]
+
+    # Stage all files
+    stage_source_assets(datasets, args, assets_path)
+
+    logging.info("Build step 1 - get source assets - finished")
+    return datasets
+
+
+def build_step2_create_axis(
+    soma_path: str,
+    assets_path: str,
+    datasets: List[Dataset],
+    experiment_builders: List[ExperimentBuilder],
+    args: argparse.Namespace,
+) -> Tuple[soma.Collection, List[Dataset]]:
+    """
+    Create all objects, and populate the axis dataframes.
+
+    Returns: the filtered datasets that will be included. This is simply
+    an optimization to allow subsequent X matrix writing to skip unused
+    datasets.
+    """
+    logging.info("Build step 2 - axis creation - started")
+
+    top_level_collection = create_top_level_collections(soma_path)
+
+    # Create axis
+    for e in experiment_builders:
+        e.create(data_collection=top_level_collection["census_data"])
+        assert soma.Experiment(e.se_uri).exists()
+
+    # Write obs axis and accumulate var axis (and remember the datasets that pass our filter)
+    filtered_datasets = []
+    N = len(datasets) * len(experiment_builders)
+    n = 1
+    for (dataset, ad) in open_anndata(assets_path, datasets, backed="r"):
+        dataset_total_cell_count = 0
+        for e in experiment_builders:
+            dataset_total_cell_count += e.accumulate_axes(dataset, ad, progress=(n, N))
+            n += 1
+
+        dataset.dataset_total_cell_count = dataset_total_cell_count
+        if dataset_total_cell_count > 0:
+            filtered_datasets.append(dataset)
+
+    # Commit / write var
+    for e in experiment_builders:
+        e.commit_axis()
+        logging.info(f"Experiment {e.name} will contain {e.n_obs} cells from {e.n_datasets} datasets")
+
+    logging.info("Build step 2 - axis creation - finished")
+    return top_level_collection, filtered_datasets
+
+
+def build_step3_create_X_layers(
+    assets_path: str,
+    filtered_datasets: List[Dataset],
+    experiment_builders: List[ExperimentBuilder],
+    args: argparse.Namespace,
+) -> None:
+    """
+    Create and populate all X layers
+    """
+    logging.info("Build step 3 - X layer creation - started")
+    # base_path = args.uri
+
+    # Create X layers
+    for e in experiment_builders:
+        e.create_X_layers(filtered_datasets)
+        e.create_joinid_metadata()
+
+    # Process all X data
+    populate_X_layers(assets_path, filtered_datasets, experiment_builders, args)
+
+    # tidy up and finish
+    for e in experiment_builders:
+        e.commit_X(consolidate=args.consolidate)
+        e.commit_presence_matrix(filtered_datasets)
+
+    logging.info("Build step 3 - X layer creation - finished")
+
+
+def create_args_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="cell_census_builder")
+    parser.add_argument("uri", type=str, help="Census top-level URI")
+    parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase logging verbosity")
+    parser.add_argument(
+        "-mp",
+        "--multi-process",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Use multiple processes",
+    )
+    parser.add_argument("--max-workers", type=int, help="Concurrency")
+    parser.add_argument(
+        "--build-tag",
+        type=str,
+        default=datetime.now().astimezone().date().isoformat(),
+        help="Census build tag (default: current date is ISO8601 format)",
+    )
+
+    subparsers = parser.add_subparsers(required=True, dest="subcommand")
+
+    # BUILD
+    build_parser = subparsers.add_parser("build", help="Build Cell Census")
+    build_parser.add_argument(
+        "--manifest",
+        type=argparse.FileType("r"),
+        help="Manifest file",
+    )
+    build_parser.add_argument(
+        "--validate", action=argparse.BooleanOptionalAction, default=True, help="Validate immediately after build"
+    )
+    build_parser.add_argument(
+        "--consolidate",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Consolidate TileDB objects after build",
+    )
+    # hidden option for testing. Will process only the first 'n' datasets
+    build_parser.add_argument("--test-first-n", type=int, help=argparse.SUPPRESS)
+
+    # VALIDATE
+    subparsers.add_parser("validate", help="Validate an existing cell census build")
+
+    return parser
+
+
+if __name__ == "__main__":
+    # this is very important to do early, before any use of `concurrent.futures`
+    if multiprocessing.get_start_method(True) != "spawn":
+        multiprocessing.set_start_method("spawn", True)
+
+    sys.exit(main())
diff --git a/cell_census_builder/anndata.py b/cell_census_builder/anndata.py
new file mode 100644
index 000000000..835d486f7
--- /dev/null
+++ b/cell_census_builder/anndata.py
@@ -0,0 +1,164 @@
+import logging
+from typing import Any, Iterator, List, Optional, Protocol, TypedDict, Union
+
+import anndata
+import numpy as np
+import pandas as pd
+
+from .datasets import Dataset
+from .globals import CXG_SCHEMA_VERSION, CXG_SCHEMA_VERSION_IMPORT, FEATURE_REFERENCE_IGNORE, RNA_SEQ
+from .util import uricat
+
+AnnDataFilterSpec = TypedDict(
+    "AnnDataFilterSpec",
+    {
+        "organism_ontology_term_id": Optional[str],
+        "assay_ontology_term_ids": Optional[List[str]],
+    },
+)
+
+
+def open_anndata(
+    base_path: str, datasets: Union[List[Dataset], Dataset], *args: Any, **kwargs: Any
+) -> Iterator[tuple[Dataset, anndata.AnnData]]:
+    """
+    Generator to open anndata in a given mode, and filter out those H5ADs which do not match our base
+    criteria for inclusion in the census.
+
+    Will localize non-local (eg s3) URIs to accomadate AnnData/H5PY requirement for a local file.
+
+    Apply criteria to filter out H5ADs we don't want or can't process.  Also apply a set of normalization
+    remainder of code expects, such as final/raw feature equivalence.
+    """
+    if not isinstance(datasets, list):
+        datasets = [datasets]
+
+    for h5ad in datasets:
+        path = uricat(base_path, h5ad.dataset_h5ad_path)
+        logging.debug(f"open_anndata: {path}")
+        ad = anndata.read_h5ad(path, *args, **kwargs)
+
+        assert CXG_SCHEMA_VERSION == "3.0.0"
+        if h5ad.schema_version == "":
+            h5ad.schema_version = get_cellxgene_schema_version(ad)
+        if h5ad.schema_version not in CXG_SCHEMA_VERSION_IMPORT:
+            logging.error(f"H5AD has old schema version, skipping {h5ad.dataset_h5ad_path}")
+            continue
+
+        # Multi-organism datasets - any dataset with 2+ feature_reference organisms is ignored,
+        # exclusive of values in FEATURE_REFERENCE_IGNORE. See also, cell filter for mismatched
+        # cell/feature organism values.
+        feature_reference_organisms = set(ad.var.feature_reference.unique()) - FEATURE_REFERENCE_IGNORE
+        if len(feature_reference_organisms) > 1:
+            logging.info(f"H5AD ignored due to multi-organism feature_reference: {h5ad.dataset_id}")
+            continue
+
+        # shape of raw and final must be same shape. Schema 2.0 disallows cell filtering,
+        # but DOES allow feature/gene filtering. The "census" specification requires that
+        # any filtered features be added back to the final layer.
+        if ad.raw is not None:
+            missing_from_var = ad.raw.var.index.difference(ad.var.index)
+            if len(missing_from_var) > 0:
+                raw_var = ad.raw.var.loc[missing_from_var].copy()
+                raw_var["feature_is_filtered"] = True
+                # TODO - these should be looked up in the ontology
+                raw_var["feature_name"] = "unknown"
+                raw_var["feature_reference"] = "unknown"
+                new_var = pd.concat([ad.var, raw_var])
+                if ad.isbacked:
+                    ad = ad.to_memory()
+                ad.X.resize(ad.n_obs, len(new_var))
+                ad = anndata.AnnData(X=ad.X, obs=ad.obs, var=new_var, raw=ad.raw, dtype=np.float32)
+
+        # sanity checks & expectations for any AnnData we can handle
+        if ad.raw is not None:
+            assert ad.X.shape == ad.raw.X.shape
+            assert len(ad.raw.var) == len(ad.var)
+            assert len(ad.raw.var.index.difference(ad.var.index)) == 0
+            assert len(ad.var.index.difference(ad.raw.var.index)) == 0
+        assert ad.X.shape == (len(ad.obs), len(ad.var))
+
+        # TODO: In principle, we could look up missing feature_name, but for now, just assert they exist
+        assert ((ad.var.feature_name != "") & (ad.var.feature_name != None)).all()  # noqa: E711
+
+        yield (h5ad, ad)
+
+
+class AnnDataFilterFunction(Protocol):
+    def __call__(self, ad: anndata.AnnData, retain_X: Optional[bool] = True) -> anndata.AnnData:
+        ...
+
+
+def make_anndata_cell_filter(filter_spec: AnnDataFilterSpec) -> AnnDataFilterFunction:
+    """
+    Return an anndata sliced/filtered for those cells/genes of interest.
+
+    obs filter:
+    * not organoid or cell culture
+    * Caller-specified assays only
+    * Caller-specified taxa (obs.organism_ontology_term_id == '<user-supplied>')
+    * Organism term ID value not equal to gene feature_reference value
+
+    var filter:
+    * genes only  (var.feature_biotype == 'gene')
+    """
+    organism_ontology_term_id = filter_spec.get("organism_ontology_term_id", None)
+    assay_ontology_term_ids = filter_spec.get("assay_ontology_term_ids", None)
+
+    def _filter(ad: anndata.AnnData, retain_X: Optional[bool] = True) -> anndata.AnnData:
+        obs_mask = ~(  # noqa: E712
+            ad.obs.tissue_ontology_term_id.str.endswith(" (organoid)")
+            | ad.obs.tissue_ontology_term_id.str.endswith(" (cell culture)")
+        )
+
+        if organism_ontology_term_id is not None:
+            obs_mask = obs_mask & (ad.obs.organism_ontology_term_id == organism_ontology_term_id)
+        if assay_ontology_term_ids is not None:
+            obs_mask = obs_mask & ad.obs.assay_ontology_term_id.isin(RNA_SEQ)
+
+        # multi-organism dataset cell filter - exclude any cells where organism != feature_reference
+        feature_references = set(ad.var.feature_reference.unique()) - FEATURE_REFERENCE_IGNORE
+        assert len(feature_references) == 1  # else there is a bug in open_anndata
+        feature_reference_organism_ontology_id = feature_references.pop()
+        obs_mask = obs_mask & (ad.obs.organism_ontology_term_id == feature_reference_organism_ontology_id)
+
+        # This does NOT slice raw on the var axis.
+        # See https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.raw.html
+        ad = ad[obs_mask, (ad.var.feature_biotype == "gene")]
+
+        obs = ad.obs
+        var = ad.var
+        var.index.rename("feature_id", inplace=True)
+        X = ad.X if retain_X else None
+        raw = ad.raw if retain_X and ad.n_obs > 0 else None
+
+        if raw:
+            # remove non-gene features
+            mask = ad.raw.var.feature_biotype == "gene"
+            raw = anndata.AnnData(X=ad.raw.X[:, mask], obs=ad.obs, var=ad.raw.var[mask], dtype=np.float32)
+
+        # sanity checks
+        if raw is not None:
+            assert ad.var.index.difference(raw.var.index).empty
+            assert raw.var.index.difference(ad.var.index).empty
+            assert ad.X.shape == raw.X.shape
+
+        # this dumps all other ancillary state, eg, obsm/varm/....
+        ad = anndata.AnnData(X=X, obs=obs, var=var, raw=raw, dtype=np.float32)
+        return ad
+
+    return _filter
+
+
+def get_cellxgene_schema_version(ad: anndata.AnnData) -> str:
+
+    # cellxgene >=2.0
+    if "schema_version" in ad.uns:
+        # not sure why this is a nested array
+        return str(ad.uns["schema_version"])
+
+    # cellxgene 1.X
+    if "version" in ad.uns:
+        return str(ad.uns["version"]["corpora_schema_version"])
+
+    return ""
diff --git a/cell_census_builder/census_summary.py b/cell_census_builder/census_summary.py
new file mode 100644
index 000000000..ce74f7aa8
--- /dev/null
+++ b/cell_census_builder/census_summary.py
@@ -0,0 +1,40 @@
+import logging
+from typing import Sequence
+
+import pandas as pd
+import pyarrow as pa
+import tiledbsoma as soma
+
+from .experiment_builder import ExperimentBuilder, get_summary_stats
+from .globals import CENSUS_SCHEMA_VERSION, CENSUS_SUMMARY_NAME, TileDB_Ctx
+from .util import pandas_dataframe_strings_to_ascii_issue_247_workaround, uricat
+
+
+def create_census_summary(
+    info_collection: soma.Collection, experiment_builders: Sequence[ExperimentBuilder], build_tag: str
+) -> None:
+    logging.info("Creating census summary")
+
+    summary_stats = get_summary_stats(experiment_builders)
+    data = [
+        ("cell_census_schema_version", CENSUS_SCHEMA_VERSION),
+        ("cell_census_build_date", build_tag),
+        ("total_cell_count", str(summary_stats["total_cell_count"])),
+        ("unique_cell_count", str(summary_stats["unique_cell_count"])),
+        ("number_donors_homo_sapiens", str(summary_stats["number_donors"]["homo_sapiens"])),
+        ("number_donors_mus_musculus", str(summary_stats["number_donors"]["mus_musculus"])),
+    ]
+
+    df = pd.DataFrame.from_records(data, columns=["label", "value"])
+    df["soma_joinid"] = range(len(df))
+
+    # TODO: work-around for TileDB-SOMA#274.  Remove when fixed.
+    df = pandas_dataframe_strings_to_ascii_issue_247_workaround(df)
+
+    # write to a SOMA dataframe
+    summary_uri = uricat(info_collection.uri, CENSUS_SUMMARY_NAME)
+    summary = soma.DataFrame(summary_uri, ctx=TileDB_Ctx())
+    summary.create(pa.Schema.from_pandas(df, preserve_index=False), index_column_names=["soma_joinid"])
+    for batch in pa.Table.from_pandas(df, preserve_index=False).to_batches():
+        summary.write(batch)
+    info_collection.set(CENSUS_SUMMARY_NAME, summary, relative=True)
diff --git a/cell_census_builder/consolidate.py b/cell_census_builder/consolidate.py
new file mode 100644
index 000000000..099f08dfc
--- /dev/null
+++ b/cell_census_builder/consolidate.py
@@ -0,0 +1,37 @@
+import logging
+
+import tiledbsoma as soma
+
+if soma.get_storage_engine() == "tiledb":
+    import tiledb
+
+
+def consolidate(uri: str) -> None:
+    """
+    This is a non-portable, TileDB-specific consolidation routine.
+    """
+    if soma.get_storage_engine() != "tiledb":
+        return
+
+    census = soma.Collection(uri)
+    if not census.exists():
+        return
+
+    consolidate_collection(census)
+
+
+def consolidate_collection(collection: soma.Collection) -> None:
+    for soma_obj in collection.values():
+        type = soma_obj.soma_type
+        if type in ["SOMADataFrame", "SOMASparseNdArray", "SOMADenseNdArray"]:
+            logging.info(f"Consolidating {type} {soma_obj.uri}")
+            consolidate_tiledb_object(soma_obj.uri)
+        elif type in ["SOMACollection", "SOMAExperiment", "SOMAMeasurement"]:
+            consolidate_collection(soma_obj)
+        else:
+            raise TypeError(f"Unknown SOMA type {type}.")
+
+
+def consolidate_tiledb_object(uri: str) -> None:
+    tiledb.consolidate(uri, config=tiledb.Config({"sm.consolidation.buffer_size": 1 * 1024**3}))
+    tiledb.vacuum(uri)
diff --git a/cell_census_builder/datasets.py b/cell_census_builder/datasets.py
new file mode 100644
index 000000000..6e2217dcb
--- /dev/null
+++ b/cell_census_builder/datasets.py
@@ -0,0 +1,83 @@
+import dataclasses
+import logging
+from typing import List, Type, TypeVar
+
+import pandas as pd
+import pyarrow as pa
+import tiledbsoma as soma
+
+from .globals import CENSUS_DATASETS_COLUMNS, CENSUS_DATASETS_NAME, TileDB_Ctx
+from .util import pandas_dataframe_strings_to_ascii_issue_247_workaround, uricat
+
+T = TypeVar("T", bound="Dataset")
+
+
+@dataclasses.dataclass
+class Dataset:
+    """
+    Type used to handle source H5AD datasets read from manifest
+    """
+
+    # Required
+    dataset_id: str  # CELLxGENE dataset_id
+    corpora_asset_h5ad_uri: str  # the URI from which we originally read the H5AD asset
+    dataset_h5ad_path: str = ""  # set after staging, required by end of process
+
+    # Optional
+    dataset_title: str = ""  # CELLxGENE dataset title
+    collection_id: str = ""  # CELlxGENE collection id
+    collection_name: str = ""  # CELlxGENE collection name
+    collection_doi: str = ""  # CELLxGENE collection doi
+    asset_h5ad_filesize: int = -1
+
+    # Optional, inferred from data if not already known
+    schema_version: str = ""  # empty string if version unknown
+    dataset_total_cell_count: int = 0  # number of cells in the census by dataset
+
+    # Assigned late in the game, only to datasets we incorporate into the census
+    soma_joinid: int = -1
+
+    def __post_init__(self) -> None:
+        """
+        Type contracts - downstream code assume these types, so enforce it.
+        """
+        for f in dataclasses.fields(self):
+            assert isinstance(
+                getattr(self, f.name), f.type
+            ), f"{f.name} has incorrect type, expected {f.type}, got {type(getattr(self,f.name))}"
+
+    @classmethod
+    def to_dataframe(cls: Type[T], datasets: List[T]) -> pd.DataFrame:
+        if len(datasets) == 0:
+            return pd.DataFrame({field.name: pd.Series(dtype=field.type) for field in dataclasses.fields(cls)})
+
+        return pd.DataFrame(datasets)
+
+    @classmethod
+    def from_dataframe(cls: Type[T], datasets: pd.DataFrame) -> List["Dataset"]:
+        return [Dataset(**r) for r in datasets.to_dict("records")]
+
+
+def assign_soma_joinids(datasets: List[Dataset]) -> None:
+    for joinid, dataset in enumerate(datasets):
+        dataset.soma_joinid = joinid
+
+
+def create_dataset_manifest(info_collection: soma.Collection, datasets: List[Dataset]) -> None:
+    """
+    Write the Cell Census `census_datasets` dataframe
+    """
+    logging.info("Creating dataset_manifest")
+    manifest_df = Dataset.to_dataframe(datasets)
+    manifest_df = manifest_df[CENSUS_DATASETS_COLUMNS + ["soma_joinid"]]
+
+    # TODO: work-around for TileDB-SOMA#274.  Remove when fixed.
+    manifest_df = pandas_dataframe_strings_to_ascii_issue_247_workaround(manifest_df)
+
+    # write to a SOMA dataframe
+    manifest_uri = uricat(info_collection.uri, CENSUS_DATASETS_NAME)
+    manifest = soma.DataFrame(manifest_uri, ctx=TileDB_Ctx())
+    manifest.create(pa.Schema.from_pandas(manifest_df, preserve_index=False), index_column_names=["soma_joinid"])
+    for batch in pa.Table.from_pandas(manifest_df, preserve_index=False).to_batches():
+        manifest.write(batch)
+    info_collection.set(CENSUS_DATASETS_NAME, manifest, relative=True)
diff --git a/cell_census_builder/experiment_builder.py b/cell_census_builder/experiment_builder.py
new file mode 100644
index 000000000..945838f08
--- /dev/null
+++ b/cell_census_builder/experiment_builder.py
@@ -0,0 +1,566 @@
+import argparse
+import concurrent.futures
+import gc
+import io
+import logging
+from enum import IntEnum
+from typing import List, Optional, Sequence, Tuple, TypedDict, Union, overload
+
+import anndata
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import pyarrow as pa
+import tiledbsoma as soma
+from scipy import sparse
+
+from .anndata import AnnDataFilterSpec, make_anndata_cell_filter, open_anndata
+from .datasets import Dataset
+from .globals import (
+    CENSUS_OBS_TERM_COLUMNS,
+    CENSUS_VAR_TERM_COLUMNS,
+    CXG_OBS_TERM_COLUMNS,
+    DONOR_ID_IGNORE,
+    X_LAYERS,
+    TileDB_Ctx,
+)
+from .mp import create_process_pool_executor
+from .source_assets import cat_file
+from .summary_cell_counts import accumulate_summary_counts, init_summary_counts_accumulator
+from .tissue_mapper import TissueMapper  # type: ignore
+from .util import (
+    anndata_ordered_bool_issue_853_workaround,
+    array_chunker,
+    is_positive_integral,
+    pandas_dataframe_strings_to_ascii_issue_247_workaround,
+    uricat,
+)
+
+# Contents:
+#   dataset_id
+#   dataset_soma_joinid - used as the presence row index
+#   eb_name
+#   data - presence COO data
+#   cols - presence COO col
+#
+# TODO: convert this to a dataclass or namedtuple.
+#
+PresenceResult = tuple[str, int, str, npt.NDArray[np.bool_], npt.NDArray[np.int64]]
+PresenceResults = tuple[PresenceResult, ...]
+
+# UBERON tissue term mapper
+tissue_mapper: TissueMapper = TissueMapper()
+
+
+class ExperimentBuilder:
+    """
+    Class to help build a parameterized SOMA experiment, where key parameters are:
+    * experiment "name" (eg, 'human'), must be unique in all experiments.
+    * an AnnData filter used to cherry pick data for the experiment
+    * methods to progressively build the experiment
+
+    The creation and driving of these objects is done by the main loop.
+    """
+
+    name: str
+    anndata_cell_filter_spec: AnnDataFilterSpec
+    gene_feature_length_uris: List[str]
+    gene_feature_length: pd.DataFrame
+    build_state: "ExperimentBuilder.BuildState"
+
+    # builder state sanity check, used to catch usage errors.
+
+    class BuildState(IntEnum):
+        Initialized = 0
+        Created = 1
+        AxisWritten = 2
+        X_Created = 3
+        X_JoinIdMetadataCreated = 4
+        X_Written = 5
+        X_Presence_Written = 6
+
+        def next(self) -> "ExperimentBuilder.BuildState":
+            return ExperimentBuilder.BuildState(self.value + 1)
+
+    def __init__(
+        self, base_uri: str, name: str, anndata_cell_filter_spec: AnnDataFilterSpec, gene_feature_length_uris: List[str]
+    ):
+        self.name = name
+        self.anndata_cell_filter_spec = anndata_cell_filter_spec
+        self.gene_feature_length_uris = gene_feature_length_uris
+        self.se_uri = uricat(base_uri, name)
+
+        # accumulated state
+        self.n_obs: int = 0
+        self.n_unique_obs: int = 0
+        self.n_var: int = 0
+        self.n_datasets: int = 0
+        self.n_donors: int = 0  # Caution: defined as (unique dataset_id, donor_id) tuples, *excluding* some values
+        self.var_df: pd.DataFrame = pd.DataFrame(columns=["feature_id", "feature_name"])
+        self.dataset_obs_joinid_start: dict[str, int]
+        self.census_summary_cell_counts = init_summary_counts_accumulator()
+        self.presence: dict[int, tuple[npt.NDArray[np.bool_], npt.NDArray[np.int64]]] = {}
+        self.build_state = ExperimentBuilder.BuildState.Initialized
+
+        self.load_assets()
+
+    def load_assets(self) -> None:
+        """
+        Load any external assets required to create the experiment.
+        """
+        self.gene_feature_length = pd.concat(
+            pd.read_csv(
+                io.BytesIO(cat_file(uri)),
+                names=["feature_id", "feature_name", "gene_version", "feature_length"],
+            )
+            .set_index("feature_id")
+            .drop(columns=["feature_name", "gene_version"])
+            for uri in self.gene_feature_length_uris
+        )
+        logging.info(f"Loaded gene lengths external reference for {self.name}, {len(self.gene_feature_length)} genes.")
+
+    def is_finished(self) -> bool:
+        return self.build_state == ExperimentBuilder.BuildState.X_Presence_Written
+
+    def create(self, data_collection: soma.Collection) -> None:
+        assert self.build_state == ExperimentBuilder.BuildState.Initialized
+
+        """Make experiment at `uri` with a single Measurement and add to top-level collection."""
+        logging.info(f"{self.name}: create experiment at {self.se_uri}")
+
+        se = soma.Experiment(self.se_uri, ctx=TileDB_Ctx())
+        if se.exists():
+            logging.error("Census already exists - aborting")
+            raise Exception("Census already exists")
+        se.create()
+        data_collection.set(self.name, se, relative=True)
+
+        # create `ms`
+        se.set("ms", soma.Collection(uricat(se.uri, "ms")).create(), relative=True)
+
+        # create `obs`
+        obs_schema = pa.schema(list(CENSUS_OBS_TERM_COLUMNS.items()))
+        se.set(
+            "obs",
+            soma.DataFrame(uricat(se.uri, "obs")).create(obs_schema, index_column_names=["soma_joinid"]),
+            relative=True,
+        )
+
+        # make measurement and add to ms collection
+        measurement = soma.Measurement(uricat(se.ms.uri, "RNA")).create()
+        se.ms.set("RNA", measurement, relative=True)
+
+        # make the `var` in the measurement
+        var_schema = pa.schema(list(CENSUS_VAR_TERM_COLUMNS.items()))
+        measurement.set(
+            "var",
+            soma.DataFrame(uricat(measurement.uri, "var")).create(var_schema, index_column_names=["soma_joinid"]),
+            relative=True,
+        )
+
+        # make the `X` collection (but not the actual layers)
+        measurement.set("X", soma.Collection(uricat(measurement.uri, "X")).create(), relative=True)
+
+        # make the varp, to later contain the presence matrix
+        measurement.set("varp", soma.Collection(uricat(measurement.uri, "varp")).create(), relative=True)
+
+        self.build_state = self.build_state.next()
+        return
+
+    def accumulate_axes(self, dataset: Dataset, ad: anndata.AnnData, progress: Tuple[int, int] = (0, 0)) -> int:
+        """
+        Write obs, accumate var.
+
+        Returns: number of cells that make it past the experiment filter.
+        """
+        progmsg = f"({progress[0]} of {progress[1]})"
+        logging.info(f"{self.name}: accumulate axis for dataset '{dataset.dataset_id}' {progmsg}")
+        assert self.build_state == ExperimentBuilder.BuildState.Created
+
+        anndata_cell_filter = make_anndata_cell_filter(self.anndata_cell_filter_spec)
+        ad = anndata_cell_filter(ad, retain_X=False)
+        if ad.n_obs == 0:
+            logging.info(f"{self.name} - H5AD has no data after filtering, skipping {dataset.dataset_h5ad_path}")
+            return 0
+
+        # Narrow columns just to minimize memory footprint. Summary cell counting
+        # requires 'organism', do be careful not to delete that.
+        obs_df = ad.obs[list(CXG_OBS_TERM_COLUMNS) + ["organism"]].reset_index(drop=True).copy()
+
+        # TODO XXX: Temporary work around pending resolution of TileDB-SOMA#274
+        obs_df = pandas_dataframe_strings_to_ascii_issue_247_workaround(obs_df)
+
+        obs_df["soma_joinid"] = range(self.n_obs, self.n_obs + len(obs_df))
+        obs_df["dataset_id"] = dataset.dataset_id
+
+        # high-level tissue mapping
+        add_tissue_mapping(obs_df, dataset.dataset_id)
+
+        # Accumulate aggregation counts
+        self._accumulate_summary_cell_counts(obs_df)
+
+        # drop columns we don't want to write
+        obs_df = obs_df[list(CENSUS_OBS_TERM_COLUMNS)]
+        obs_df = anndata_ordered_bool_issue_853_workaround(obs_df)
+
+        se = soma.Experiment(self.se_uri, ctx=TileDB_Ctx())
+        assert se.exists()
+
+        pa_table = pa.Table.from_pandas(
+            obs_df,
+            preserve_index=False,
+            columns=list(CENSUS_OBS_TERM_COLUMNS),
+        )
+        for pa_batch in pa_table.to_batches():
+            se.obs.write(pa_batch)
+
+        # Accmulate the union of all var ids/names (for raw and processed), to be later persisted.
+        # NOTE: assumes raw.var is None, OR has same index as var. Currently enforced in open_anndata(),
+        # but may need to evolve this logic if that assumption is not scalable.
+        tv = ad.var.rename_axis("feature_id").reset_index()[["feature_id", "feature_name"]]
+        self.var_df = pd.concat([self.var_df, tv]).drop_duplicates()
+
+        self.n_obs += len(obs_df)
+        self.n_unique_obs += (obs_df.is_primary_data == True).sum()  # noqa: E712
+
+        donors = obs_df.donor_id.unique()
+        self.n_donors += len(donors) - np.isin(donors, DONOR_ID_IGNORE).sum()
+
+        self.n_datasets += 1
+        return len(obs_df)
+
+    def commit_axis(self) -> None:
+        logging.info(f"{self.name}: commit axes")
+        se = soma.Experiment(self.se_uri)
+        assert se.exists()
+        assert self.build_state == ExperimentBuilder.BuildState.Created
+
+        # if is possible there is nothing to write
+        if len(self.var_df) > 0:
+            # persist var
+            self.var_df["soma_joinid"] = range(len(self.var_df))
+            self.var_df = self.var_df.join(self.gene_feature_length["feature_length"], on="feature_id")
+            self.var_df.feature_length.fillna(0, inplace=True)
+
+            # TODO XXX: Temporary work around pending resolution of TileDB-SOMA#274
+            self.var_df = pandas_dataframe_strings_to_ascii_issue_247_workaround(self.var_df)
+
+            self.var_df = anndata_ordered_bool_issue_853_workaround(self.var_df)
+
+            se.ms["RNA"].var.write(
+                pa.RecordBatch.from_pandas(
+                    self.var_df,
+                    preserve_index=False,
+                    columns=list(CENSUS_VAR_TERM_COLUMNS),
+                )
+            )
+
+        self.n_var = len(self.var_df)
+        self.build_state = self.build_state.next()
+        return
+
+    def create_X_layers(self, datasets: List[Dataset]) -> None:
+        """
+        Create layers in ms['RNA']/X
+        """
+        logging.info(f"{self.name}: create X layers")
+        se = soma.Experiment(self.se_uri, ctx=TileDB_Ctx())
+        assert se.exists()
+        assert se.ms["RNA"].exists()
+        assert self.n_obs >= 0 and self.n_var >= 0
+        assert self.build_state == ExperimentBuilder.BuildState.AxisWritten
+        assert self.n_obs == 0 or self.n_datasets > 0
+
+        # SOMA does not currently support empty arrays, so special case this corner-case.
+        if self.n_obs > 0:
+            assert self.n_var > 0
+            measurement = se.ms["RNA"]
+            for layer_name in X_LAYERS:
+                snda = soma.SparseNdArray(uricat(measurement.X.uri, layer_name), ctx=TileDB_Ctx())
+                snda.create(pa.float32(), (self.n_obs, self.n_var))
+                measurement.X.set(layer_name, snda, relative=True)
+
+            presence_matrix = soma.SparseNdArray(
+                uricat(measurement.varp.uri, "dataset_presence_matrix"), ctx=TileDB_Ctx()
+            )
+            max_dataset_joinid = max(d.soma_joinid for d in datasets)
+            presence_matrix.create(pa.bool_(), shape=(max_dataset_joinid + 1, self.n_var))
+            measurement.varp.set("dataset_presence_matrix", presence_matrix, relative=True)
+
+        self.build_state = self.build_state.next()
+        return
+
+    def create_joinid_metadata(self) -> None:
+        logging.info(f"{self.name}: make joinid metadata")
+        assert self.build_state >= ExperimentBuilder.BuildState.AxisWritten
+        se = soma.Experiment(self.se_uri, ctx=TileDB_Ctx())
+        assert se.exists()
+
+        # Map of dataset_id -> starting soma_joinid for obs axis.  This code _assumes_
+        # that soma_joinid is contiguous (ie, no deletions in obs), which is
+        # known true for our use case (aggregating h5ads).
+        self.dataset_obs_joinid_start = (
+            se.obs.read_as_pandas_all(column_names=["dataset_id", "soma_joinid"])
+            .groupby("dataset_id")
+            .min()
+            .soma_joinid.to_dict()
+        )
+
+        self.build_state = self.build_state.next()
+
+    def commit_X(self, *, consolidate: bool = False) -> None:
+        logging.info(f"{self.name}: commit X")
+        assert self.build_state == ExperimentBuilder.BuildState.X_JoinIdMetadataCreated
+        self.build_state = self.build_state.next()
+
+    def _accumulate_summary_cell_counts(self, obs_df: pd.DataFrame) -> None:
+        """
+        Add summary counts to the census_summary_cell_counts dataframe
+        """
+        assert "dataset_id" in obs_df
+        assert len(obs_df) > 0
+        self.census_summary_cell_counts = accumulate_summary_counts(self.census_summary_cell_counts, obs_df)
+
+    def commit_presence_matrix(self, datasets: List[Dataset]) -> None:
+        """
+        Save presence matrix per Experiment
+        """
+        assert self.build_state == ExperimentBuilder.BuildState.X_Written
+
+        if len(self.presence) > 0:
+            max_dataset_joinid = max(d.soma_joinid for d in datasets)
+
+            # A few sanity checks
+            assert len(self.presence) == self.n_datasets
+            assert max_dataset_joinid >= max(self.presence.keys())  # key is dataset joinid
+
+            # LIL is fast way to create spmatrix
+            pm = sparse.lil_array((max_dataset_joinid + 1, self.n_var), dtype=bool)
+            for dataset_joinid, presence in self.presence.items():
+                data, cols = presence
+                pm[dataset_joinid, cols] = data
+
+            pm = pm.tocoo()
+            pm.eliminate_zeros()
+            assert pm.count_nonzero() == pm.nnz
+            assert pm.dtype == bool
+            se = soma.Experiment(self.se_uri, ctx=TileDB_Ctx())
+            se.ms["RNA"].varp["dataset_presence_matrix"].write_sparse_tensor(pa.SparseCOOTensor.from_scipy(pm))
+
+        self.build_state = self.build_state.next()
+
+
+def _accumulate_all_X_layers(
+    assets_path: str,
+    dataset: Dataset,
+    experiment_builders: List[ExperimentBuilder],
+    dataset_obs_joinid_starts: List[Union[None, int]],
+    ms_name: str,
+    progress: Tuple[int, int],
+) -> PresenceResults:
+    """
+    For this dataset, save all X layer information for each Experiment. This currently
+    includes:
+        X['raw'] - raw counts
+
+    Also accumulates presence information per dataset.
+
+    This is a helper function for ExperimentBuilder.accumulate_X
+    """
+    gc.collect()
+    logging.debug(f"Loading AnnData for dataset {dataset.dataset_id} ({progress[0]} of {progress[1]})")
+    unfiltered_ad = next(open_anndata(assets_path, [dataset]))[1]
+    assert unfiltered_ad.isbacked is False
+
+    presence = []
+    for eb, dataset_obs_joinid_start in zip(experiment_builders, dataset_obs_joinid_starts):
+        if dataset_obs_joinid_start is None:
+            # this dataset has no data for this experiment
+            continue
+
+        se = soma.Experiment(eb.se_uri, ctx=TileDB_Ctx())
+        assert se is not None
+        assert se.exists()
+
+        anndata_cell_filter = make_anndata_cell_filter(eb.anndata_cell_filter_spec)
+        ad = anndata_cell_filter(unfiltered_ad)
+        if ad.n_obs == 0:
+            continue
+
+        # follow CELLxGENE 3.0 schema conventions for raw/X aliasing when only raw counts exist
+        raw_X, raw_var = (ad.X, ad.var) if ad.raw is None else (ad.raw.X, ad.raw.var)
+
+        if not is_positive_integral(raw_X):
+            logging.error(f"{dataset.dataset_id} contains non-integer or negative valued data")
+
+        # save X['raw']
+        layer_name = "raw"
+        logging.info(
+            f"{eb.name}: saving X layer '{layer_name}' for dataset '{dataset.dataset_id}' "
+            f"({progress[0]} of {progress[1]})"
+        )
+        global_var_joinids = (
+            se.ms[ms_name].var.read_as_pandas_all(column_names=["feature_id", "soma_joinid"]).set_index("feature_id")
+        )
+        local_var_joinids = raw_var.join(global_var_joinids).soma_joinid.to_numpy()
+        assert (local_var_joinids >= 0).all(), f"Illegal join id, {dataset.dataset_id}"
+
+        for n, X in enumerate(array_chunker(raw_X), start=1):
+            logging.debug(f"{eb.name}/{layer_name}: X chunk {n} {dataset.dataset_id}")
+            # remap to match axes joinids
+            row = X.row.astype(np.int64) + dataset_obs_joinid_start
+            assert (row >= 0).all()
+            col = local_var_joinids[X.col]
+            assert (col >= 0).all()
+            X_remap = sparse.coo_array((X.data, (row, col)), shape=(eb.n_obs, eb.n_var))
+            se.ms[ms_name].X[layer_name].write_sparse_tensor(pa.SparseCOOTensor.from_scipy(X_remap))
+            gc.collect()
+
+        # Save presence information by dataset_id
+        assert dataset.soma_joinid >= 0  # i.e., it was assigned prior to this step
+        pres_data = raw_X.sum(axis=0) > 0
+        if isinstance(pres_data, np.matrix):
+            pres_data = pres_data.A
+        pres_data = pres_data[0]
+        pres_cols = local_var_joinids[np.arange(ad.n_vars, dtype=np.int64)]
+
+        assert pres_data.dtype == bool
+        assert pres_cols.dtype == np.int64
+        assert pres_data.shape == (ad.n_vars,)
+        assert pres_data.shape == pres_cols.shape
+        assert ad.n_vars <= eb.n_var
+
+        presence.append(
+            (
+                dataset.dataset_id,
+                dataset.soma_joinid,
+                eb.name,
+                pres_data,
+                pres_cols,
+            )
+        )
+
+    gc.collect()
+    return tuple(presence)
+
+
+@overload
+def _accumulate_X(
+    assets_path: str, dataset: Dataset, experiment_builders: List["ExperimentBuilder"], progress: Tuple[int, int]
+) -> PresenceResults:
+    ...
+
+
+@overload
+def _accumulate_X(
+    assets_path: str,
+    dataset: Dataset,
+    experiment_builders: List["ExperimentBuilder"],
+    progress: Tuple[int, int],
+    executor: Optional[concurrent.futures.Executor],
+) -> concurrent.futures.Future[PresenceResults]:
+    ...
+
+
+def _accumulate_X(
+    assets_path: str,
+    dataset: Dataset,
+    experiment_builders: List["ExperimentBuilder"],
+    progress: Tuple[int, int],
+    executor: Optional[concurrent.futures.Executor] = None,
+) -> Union[concurrent.futures.Future[PresenceResults], PresenceResults]:
+    """
+    Save X layer data for a single AnnData, for all Experiments. Return a future if
+    executor is specified, otherwise immediately do the work.
+    """
+    for eb in experiment_builders:
+        # sanity checks
+        assert eb.build_state == ExperimentBuilder.BuildState.X_JoinIdMetadataCreated
+        assert eb.dataset_obs_joinid_start is not None
+
+    dataset_obs_joinid_starts = [
+        eb.dataset_obs_joinid_start.get(dataset.dataset_id, None) for eb in experiment_builders
+    ]
+    if executor is not None:
+        return executor.submit(
+            _accumulate_all_X_layers,
+            assets_path,
+            dataset,
+            experiment_builders,
+            dataset_obs_joinid_starts,
+            "RNA",
+            progress,
+        )
+    else:
+        return _accumulate_all_X_layers(
+            assets_path, dataset, experiment_builders, dataset_obs_joinid_starts, "RNA", progress
+        )
+
+
+def populate_X_layers(
+    assets_path: str, datasets: List[Dataset], experiment_builders: List[ExperimentBuilder], args: argparse.Namespace
+) -> None:
+    """
+    Do all X layer processing for all Experiments.
+    """
+
+    # populate X layers
+    presence: List[PresenceResult] = []
+    if args.multi_process:
+        with create_process_pool_executor(args) as pe:
+
+            futures = {
+                _accumulate_X(
+                    assets_path,
+                    dataset,
+                    experiment_builders,
+                    progress=(n, len(datasets)),
+                    executor=pe,
+                ): dataset
+                for n, dataset in enumerate(datasets, start=1)
+            }
+
+            for n, f in enumerate(concurrent.futures.as_completed(futures), start=1):
+                # propagate exceptions - not expecting any other return values
+                presence += f.result()
+                logging.info(f"pass 2, {futures[f].dataset_id} ({n} of {len(futures)}) complete.")
+
+    else:
+        for n, dataset in enumerate(datasets, start=1):
+            presence += _accumulate_X(assets_path, dataset, experiment_builders, progress=(n, len(datasets)))
+
+    eb_by_name = {e.name: e for e in experiment_builders}
+    for _, dataset_soma_joinid, eb_name, pres_data, pres_col in presence:
+        eb_by_name[eb_name].presence[dataset_soma_joinid] = (pres_data, pres_col)
+
+
+class SummaryStats(TypedDict):
+    total_cell_count: int
+    unique_cell_count: int
+    number_donors: dict[str, int]
+
+
+def get_summary_stats(experiment_builders: Sequence[ExperimentBuilder]) -> SummaryStats:
+    return {
+        "total_cell_count": sum(e.n_obs for e in experiment_builders),
+        "unique_cell_count": sum(e.n_unique_obs for e in experiment_builders),
+        "number_donors": {e.name: e.n_donors for e in experiment_builders},
+    }
+
+
+def add_tissue_mapping(obs_df: pd.DataFrame, dataset_id: str) -> None:
+    """Inplace addition of tissue_general-related columns"""
+
+    tissue_ids = obs_df.tissue_ontology_term_id.unique()
+
+    # Map specific ID -> general ID
+    tissue_general_id_map = {id: tissue_mapper.get_high_level_tissue(id) for id in tissue_ids}
+    if not all(tissue_general_id_map.values()):
+        logging.error(f"{dataset_id} contains tissue types which could not be generalized.")
+    obs_df["tissue_general_ontology_term_id"] = obs_df.tissue_ontology_term_id.map(tissue_general_id_map)
+
+    # Assign general label
+    tissue_general_label_map = {
+        id: tissue_mapper.get_label_from_writable_id(id) for id in tissue_general_id_map.values()
+    }
+    obs_df["tissue_general"] = obs_df.tissue_general_ontology_term_id.map(tissue_general_label_map)
diff --git a/cell_census_builder/globals.py b/cell_census_builder/globals.py
new file mode 100644
index 000000000..89964d8e7
--- /dev/null
+++ b/cell_census_builder/globals.py
@@ -0,0 +1,126 @@
+import pyarrow as pa
+import tiledb
+
+CENSUS_SCHEMA_VERSION = "0.0.1"
+
+CXG_SCHEMA_VERSION = "3.0.0"  # version we write to the census
+CXG_SCHEMA_VERSION_IMPORT = [CXG_SCHEMA_VERSION]  # versions we can ingest
+
+# Columns expected in the census_datasets dataframe
+CENSUS_DATASETS_COLUMNS = [
+    "collection_id",
+    "collection_name",
+    "collection_doi",
+    "dataset_id",
+    "dataset_title",
+    "dataset_h5ad_path",
+    "dataset_total_cell_count",
+]
+CENSUS_DATASETS_NAME = "datasets"  # object name
+
+CENSUS_SUMMARY_CELL_COUNTS_COLUMNS = {
+    "organism": pa.string(),
+    "category": pa.string(),
+    "label": pa.string(),
+    "ontology_term_id": pa.string(),
+    "total_cell_count": pa.int64(),
+    "unique_cell_count": pa.int64(),
+}
+CENSUS_SUMMARY_CELL_COUNTS_NAME = "summary_cell_counts"  # object name
+
+CENSUS_SUMMARY_NAME = "summary"
+
+# CXG schema columns we preserve in our census, and the Arrow type to encode as.  Schema:
+# https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.0.0/schema.md
+#
+# NOTE: a few additional columns are added (they are not defined in the CXG schema),
+# eg., dataset_id, tissue_general, etc.
+CXG_OBS_TERM_COLUMNS = {
+    "assay": pa.large_string(),
+    "assay_ontology_term_id": pa.large_string(),
+    "cell_type": pa.large_string(),
+    "cell_type_ontology_term_id": pa.large_string(),
+    "development_stage": pa.large_string(),
+    "development_stage_ontology_term_id": pa.large_string(),
+    "disease": pa.large_string(),
+    "disease_ontology_term_id": pa.large_string(),
+    "donor_id": pa.large_string(),
+    "is_primary_data": pa.bool_(),
+    "self_reported_ethnicity": pa.large_string(),
+    "self_reported_ethnicity_ontology_term_id": pa.large_string(),
+    "sex": pa.large_string(),
+    "sex_ontology_term_id": pa.large_string(),
+    "suspension_type": pa.large_string(),
+    "tissue": pa.large_string(),
+    "tissue_ontology_term_id": pa.large_string(),
+}
+CENSUS_OBS_TERM_COLUMNS = {
+    "soma_joinid": pa.int64(),
+    "dataset_id": pa.large_string(),
+    **CXG_OBS_TERM_COLUMNS,
+    "tissue_general": pa.large_string(),
+    "tissue_general_ontology_term_id": pa.large_string(),
+}
+
+CENSUS_VAR_TERM_COLUMNS = {
+    "soma_joinid": pa.int64(),
+    "feature_id": pa.large_string(),
+    "feature_name": pa.large_string(),
+    "feature_length": pa.int64(),
+}
+
+X_LAYERS = [
+    "raw",
+]
+
+# list of EFO terms that correspond to RNA seq modality/measurement
+RNA_SEQ = [
+    "EFO:0008720",  # DroNc-seq
+    "EFO:0008722",  # Drop-seq
+    "EFO:0008780",  # inDrop
+    "EFO:0008913",  # single-cell RNA sequencing
+    "EFO:0008919",  # Seq-Well
+    "EFO:0008930",  # Smart-seq
+    "EFO:0008931",  # Smart-seq2
+    "EFO:0008953",  # STRT-seq
+    "EFO:0008995",  # 10x technology
+    "EFO:0009899",  # 10x 3' v2
+    "EFO:0009900",  # 10x 5' v2
+    "EFO:0009901",  # 10x 3' v1
+    "EFO:0009922",  # 10x 3' v3
+    "EFO:0010010",  # CEL-seq2
+    "EFO:0010183",  # single cell library construction
+    "EFO:0010550",  # sci-RNA-seq
+    "EFO:0011025",  # 10x 5' v1
+    "EFO:0030002",  # microwell-seq
+    "EFO:0030003",  # 10x 3' transcription profiling
+    "EFO:0030004",  # 10x 5' transcription profiling
+    "EFO:0030019",  # Seq-Well S3
+    "EFO:0700003",  # BD Rhapsody Whole Transcriptome Analysis
+    "EFO:0700004",  # BD Rhapsody Targeted mRNA
+]
+
+DONOR_ID_IGNORE = ["pooled", "unknown"]
+
+# Feature_reference values which are ignored (not considered) for
+# multi-organism filtering.
+SARS_COV_2 = "NCBITaxon:2697049"
+ERCC_SPIKE_INS = "NCBITaxon:32630"
+FEATURE_REFERENCE_IGNORE = {SARS_COV_2, ERCC_SPIKE_INS}
+
+
+"""
+Singletons used throughout the package
+"""
+
+# Global TileDB context
+_TileDB_Ctx: tiledb.Ctx = None
+
+
+def TileDB_Ctx() -> tiledb.Ctx:
+    return _TileDB_Ctx
+
+
+def set_tiledb_ctx(ctx: tiledb.Ctx) -> None:
+    global _TileDB_Ctx
+    _TileDB_Ctx = ctx
diff --git a/cell_census_builder/manifest.py b/cell_census_builder/manifest.py
new file mode 100644
index 000000000..3fdef61d0
--- /dev/null
+++ b/cell_census_builder/manifest.py
@@ -0,0 +1,147 @@
+import concurrent.futures
+import csv
+import io
+import logging
+import os.path
+from typing import List, Optional, Union
+
+from .datasets import Dataset
+from .globals import CXG_SCHEMA_VERSION_IMPORT
+from .util import fetch_json
+
+CXG_BASE_URI = "https://api.cellxgene.cziscience.com/"
+
+
+def parse_manifest_file(manifest_fp: io.TextIOBase) -> list[Dataset]:
+    """
+    return manifest as list of tuples, (dataset_id, URI/path), read from the text stream
+    """
+    # skip comments and strip leading/trailing white space
+    skip_comments = csv.reader(row for row in manifest_fp if not row.startswith("#"))
+    stripped = [[r.strip() for r in row] for row in skip_comments]
+    return [Dataset(dataset_id=r[0], corpora_asset_h5ad_uri=r[1]) for r in stripped]
+
+
+def dedup_datasets(datasets: List[Dataset]) -> List[Dataset]:
+    ds = {d.dataset_id: d for d in datasets}
+    if len(ds) != len(datasets):
+        logging.warning("Dataset manifest contained DUPLICATES, which will be ignored.")
+        return list(ds.values())
+    return datasets
+
+
+def load_manifest_from_fp(manifest_fp: io.TextIOBase) -> list[Dataset]:
+    logging.info("Loading manifest from file")
+    all_datasets = parse_manifest_file(manifest_fp)
+    datasets = [
+        d
+        for d in all_datasets
+        if d.corpora_asset_h5ad_uri.endswith(".h5ad") and os.path.exists(d.corpora_asset_h5ad_uri)
+    ]
+    if len(datasets) != len(all_datasets):
+        logging.warning("Manifest contained records which are not H5AD files or which are not accessible - ignoring")
+    return datasets
+
+
+def null_to_empty_str(val: Union[None, str]) -> str:
+    if val is None:
+        return ""
+    return val
+
+
+def load_manifest_from_CxG() -> list[Dataset]:
+    logging.info("Loading manifest from CELLxGENE data portal...")
+
+    # Load all collections and extract dataset_id
+    collections = fetch_json(f"{CXG_BASE_URI}curation/v1/collections")
+    assert isinstance(collections, list), "Unexpected REST API response, /curation/v1/collections"
+    datasets = {
+        dataset["id"]: {
+            "collection_id": collection["id"],
+            "collection_name": null_to_empty_str(collection["name"]),
+            "collection_doi": null_to_empty_str(collection["doi"]),
+            "dataset_title": dataset.get("title", ""),  # title is optional in schema
+            "dataset_id": dataset["id"],
+        }
+        for collection in collections
+        for dataset in collection["datasets"]
+        if dataset["tombstone"] is False  # ignore anything that has been deleted
+    }
+    logging.info(f"Found {len(datasets)} datasets, in {len(collections)} collections")
+
+    # load per-dataset schema version
+    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tp:
+        dataset_metadata = tp.map(
+            lambda d: fetch_json(
+                f"{CXG_BASE_URI}curation/v1/collections/{d['collection_id']}/datasets/{d['dataset_id']}"
+            ),
+            datasets.values(),
+        )
+    for d in dataset_metadata:
+        assert (
+            isinstance(d, dict) and "id" in d
+        ), "Unexpected REST API response, /curation/v1/collections/.../datasets/..."
+        datasets[d["id"]].update(
+            {
+                "schema_version": d["schema_version"],
+                "dataset_title": null_to_empty_str(d["title"]),
+            }
+        )
+
+    # Remove any datasets that don't match our target schema version
+    obsolete_dataset_ids = [id for id in datasets if datasets[id]["schema_version"] not in CXG_SCHEMA_VERSION_IMPORT]
+    if len(obsolete_dataset_ids) > 0:
+        logging.warning(f"Dropping {len(obsolete_dataset_ids)} datasets due to unsupported schema version")
+        for id in obsolete_dataset_ids:
+            logging.info(f"Dropping dataset_id {id} due to schema version.")
+            datasets.pop(id)
+
+    # Grab the asset URI for each dataset
+    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tp:
+        dataset_assets = tp.map(
+            lambda d: (
+                d["dataset_id"],
+                fetch_json(
+                    f"{CXG_BASE_URI}curation/v1/collections/{d['collection_id']}/datasets/{d['dataset_id']}/assets"
+                ),
+            ),
+            datasets.values(),
+        )
+    no_asset_found = []
+    for dataset_id, assets in dataset_assets:
+        assert isinstance(
+            assets, list
+        ), "Unexpected REST API response, /curation/v1/collections/.../datasets/.../assets"
+        assets_h5ad = [a for a in assets if a["filetype"] == "H5AD"]
+        if len(assets_h5ad) == 0:
+            logging.error(f"Unable to find H5AD asset for dataset id {dataset_id} - ignoring this dataset")
+            no_asset_found.append(dataset_id)
+        else:
+            asset = assets_h5ad[0]
+            datasets[dataset_id].update(
+                {
+                    "corpora_asset_h5ad_uri": asset["presigned_url"],
+                    "asset_h5ad_filesize": asset["filesize"],
+                }
+            )
+
+    # drop any datasets where we could not find an asset
+    for id in no_asset_found:
+        datasets.pop(id, None)
+
+    return [Dataset(**d) for d in datasets.values()]
+
+
+def load_manifest(manifest_fp: Optional[io.TextIOBase] = None) -> list[Dataset]:
+    """
+    Load dataset manifest from the file pointer if provided, else bootstrap
+    the load rom the CELLxGENE REST API.
+    """
+    if manifest_fp is not None:
+        datasets = load_manifest_from_fp(manifest_fp)
+    else:
+        datasets = load_manifest_from_CxG()
+
+    logging.info(f"Loaded {len(datasets)} datasets.")
+    datasets = dedup_datasets(datasets)
+    return datasets
diff --git a/cell_census_builder/mp.py b/cell_census_builder/mp.py
new file mode 100644
index 000000000..dc59deafd
--- /dev/null
+++ b/cell_census_builder/mp.py
@@ -0,0 +1,50 @@
+import argparse
+import concurrent.futures
+import logging
+import os
+from typing import Optional, cast
+
+import tiledbsoma as soma
+
+from .globals import set_tiledb_ctx
+
+if soma.get_storage_engine() == "tiledb":
+    import tiledb
+
+
+def cpu_count() -> int:
+    """Sign, os.cpu_count() returns None if "undetermined" number of CPUs"""
+    cpu_count = os.cpu_count()
+    if os.cpu_count() is None:
+        return 1
+    return cast(int, cpu_count)
+
+
+def process_initializer(verbose: int = 0) -> None:
+    level = logging.DEBUG if verbose > 1 else logging.INFO if verbose == 1 else logging.WARNING
+    logging.basicConfig(
+        format="%(asctime)s %(process)-7s %(levelname)-8s %(message)s",
+        level=level,
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    logging.captureWarnings(True)
+
+    if soma.get_storage_engine() == "tiledb":
+        set_tiledb_ctx(
+            tiledb.Ctx(
+                {
+                    "py.init_buffer_bytes": 512 * 1024**2,
+                    "py.deduplicate": "true",
+                }
+            )
+        )
+
+
+def create_process_pool_executor(
+    args: argparse.Namespace, max_workers: Optional[int] = None
+) -> concurrent.futures.ProcessPoolExecutor:
+    return concurrent.futures.ProcessPoolExecutor(
+        max_workers=args.max_workers if max_workers is None else max_workers,
+        initializer=process_initializer,
+        initargs=(args.verbose,),
+    )
diff --git a/cell_census_builder/requirements.txt b/cell_census_builder/requirements.txt
new file mode 100644
index 000000000..22da57df2
--- /dev/null
+++ b/cell_census_builder/requirements.txt
@@ -0,0 +1,16 @@
+pyarrow
+pandas
+anndata
+numpy
+tiledb
+# NOTE: Until tiledbsoma is available on PyPi, you will need to build this dependency
+# from source, per ./notebooks/README.md.
+# tiledbsoma>=0.5.0
+scipy
+fsspec
+s3fs
+requests
+aiohttp
+Cython  # required by owlready2
+wheel  # required by owlready2
+owlready2
diff --git a/cell_census_builder/source_assets.py b/cell_census_builder/source_assets.py
new file mode 100644
index 000000000..244f4f8a8
--- /dev/null
+++ b/cell_census_builder/source_assets.py
@@ -0,0 +1,58 @@
+import argparse
+import logging
+import os
+import urllib.parse
+from typing import List, Tuple, cast
+
+import aiohttp
+import fsspec
+
+from .datasets import Dataset
+from .mp import cpu_count, create_process_pool_executor
+
+
+def stage_source_assets(datasets: List[Dataset], args: argparse.Namespace, assets_dir: str) -> None:
+
+    logging.info(f"Starting asset staging to {assets_dir}")
+    assert os.path.isdir(assets_dir)
+
+    # Fetch datasets largest first, to minimize overall download time
+    datasets = sorted(datasets, key=lambda d: d.asset_h5ad_filesize, reverse=True)
+
+    N = len(datasets)
+    n_workers = max(min(8, cpu_count()), 64)
+    with create_process_pool_executor(args, n_workers) as pe:
+        paths = [
+            path
+            for path in pe.map(copy_file, ((n, dataset, assets_dir, N) for n, dataset in enumerate(datasets, start=1)))
+        ]
+
+    for i in range(len(datasets)):
+        datasets[i].dataset_h5ad_path = paths[i]
+
+
+def _copy_file(n: int, dataset: Dataset, asset_dir: str, N: int) -> str:
+    HTTP_GET_TIMEOUT_SEC = 2 * 60 * 60  # just a very big timeout
+    protocol = urllib.parse.urlparse(dataset.corpora_asset_h5ad_uri).scheme
+    fs = fsspec.filesystem(
+        protocol,
+        client_kwargs={"timeout": aiohttp.ClientTimeout(total=HTTP_GET_TIMEOUT_SEC, connect=None)},
+    )
+    dataset_file_name = f"{dataset.dataset_id}.h5ad"
+    dataset_path = f"{asset_dir}/{dataset_file_name}"
+
+    logging.info(f"Staging {dataset.dataset_id} ({n} of {N}) to {dataset_path}")
+    fs.get_file(dataset.corpora_asset_h5ad_uri, dataset_path)
+    logging.info(f"Staging {dataset.dataset_id} ({n} of {N}) complete")
+    return dataset_file_name
+
+
+def copy_file(args: Tuple[int, Dataset, str, int]) -> str:
+    return _copy_file(*args)
+
+
+def cat_file(url: str) -> bytes:
+    with fsspec.open(url, compression="infer") as f:
+        content = cast(bytes, f.read())  # fsspec has no typing, yet
+
+    return content
diff --git a/cell_census_builder/summary_cell_counts.py b/cell_census_builder/summary_cell_counts.py
new file mode 100644
index 000000000..f33e1b51a
--- /dev/null
+++ b/cell_census_builder/summary_cell_counts.py
@@ -0,0 +1,128 @@
+import logging
+from typing import Sequence
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import tiledbsoma as soma
+
+from .globals import CENSUS_SUMMARY_CELL_COUNTS_COLUMNS, CENSUS_SUMMARY_CELL_COUNTS_NAME, TileDB_Ctx
+from .util import (
+    anndata_ordered_bool_issue_853_workaround,
+    pandas_dataframe_strings_to_ascii_issue_247_workaround,
+    uricat,
+)
+
+
+def create_census_summary_cell_counts(
+    info_collection: soma.Collection, per_experiment_summary: Sequence[pd.DataFrame]
+) -> None:
+    """
+    Save per-category counts as the census_summary_cell_counts SOMA dataframe
+    """
+    logging.info("Creating census_summary_cell_counts")
+    df = (
+        pd.concat(per_experiment_summary, ignore_index=True)
+        .drop(columns=["dataset_id"])
+        .groupby(by=["organism", "category", "ontology_term_id"], as_index=False, observed=True)
+        .agg({"unique_cell_count": "sum", "total_cell_count": "sum", "label": "first"})
+    )
+    df["soma_joinid"] = df.index.astype(np.int64)
+
+    # TODO: work-around for TileDB-SOMA#274.  Remove when fixed.
+    df = pandas_dataframe_strings_to_ascii_issue_247_workaround(df)
+    df = anndata_ordered_bool_issue_853_workaround(df)
+
+    # write to a SOMA dataframe
+    summary_counts_uri = uricat(info_collection.uri, CENSUS_SUMMARY_CELL_COUNTS_NAME)
+    summary_counts = soma.DataFrame(summary_counts_uri, ctx=TileDB_Ctx())
+    summary_counts.create(pa.Schema.from_pandas(df, preserve_index=False), index_column_names=["soma_joinid"])
+    for batch in pa.Table.from_pandas(df, preserve_index=False).to_batches():
+        summary_counts.write(batch)
+    info_collection.set(CENSUS_SUMMARY_CELL_COUNTS_NAME, summary_counts, relative=True)
+
+
+def init_summary_counts_accumulator() -> pd.DataFrame:
+    return pd.DataFrame(
+        data={
+            "dataset_id": pd.Series([], dtype=str),
+            **{
+                name: pd.Series([], dtype=arrow_type.to_pandas_dtype())
+                for name, arrow_type in CENSUS_SUMMARY_CELL_COUNTS_COLUMNS.items()
+            },
+        }
+    )
+
+
+def accumulate_summary_counts(current: pd.DataFrame, obs_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Add summary counts to the census_summary_cell_counts dataframe
+    """
+    assert "dataset_id" in obs_df
+    assert len(obs_df) > 0
+
+    CATEGORIES = [
+        # term_id, label
+        ("cell_type_ontology_term_id", "cell_type"),
+        ("assay_ontology_term_id", "assay"),
+        ("tissue_ontology_term_id", "tissue"),
+        ("disease_ontology_term_id", "disease"),
+        ("self_reported_ethnicity_ontology_term_id", "self_reported_ethnicity"),
+        ("sex_ontology_term_id", "sex"),
+        ("tissue_general_ontology_term_id", "tissue_general"),
+        (None, "suspension_type"),
+    ]
+
+    dfs = []
+    for term_id, term_label in CATEGORIES:
+        cats = []
+        columns = {}
+        assert term_id is not None or term_label is not None
+        if term_id is not None:
+            cats.append(term_id)
+            columns.update({term_id: "ontology_term_id"})
+        if term_label is not None:
+            cats.append(term_label)
+            columns.update({term_label: "label"})
+        assert len(cats) > 0 and len(columns) > 0  # i.e., one or both of term or label are specified
+
+        df = obs_df[["dataset_id", "organism", *cats, "is_primary_data"]].rename(columns=columns)
+        if "label" not in df:
+            df["label"] = "na"
+        if "ontology_term_id" not in df:
+            df["ontology_term_id"] = "na"
+
+        counts = (
+            df.value_counts()
+            .to_frame(name="count")
+            .reset_index(level="is_primary_data")
+            .pivot_table(
+                values="count",
+                columns="is_primary_data",
+                index=["organism", "ontology_term_id", "label"],
+                fill_value=0,
+            )
+        )
+        if True not in counts:
+            counts[True] = 0
+        if False not in counts:
+            counts[False] = 0
+
+        counts["category"] = term_label if term_label is not None else term_id
+        counts["unique_cell_count"] = counts[True]
+        counts["total_cell_count"] = counts[True] + counts[False]
+        counts = counts.drop(columns=[True, False]).reset_index()
+        dfs.append(counts)
+
+    all = pd.DataFrame(
+        data={
+            "dataset_id": [obs_df.iloc[0].dataset_id],
+            "organism": [obs_df.iloc[0].organism],
+            "ontology_term_id": ["na"],
+            "label": ["na"],
+            "category": ["all"],
+            "unique_cell_count": [dfs[0].unique_cell_count.sum()],
+            "total_cell_count": [dfs[0].total_cell_count.sum()],
+        }
+    )
+    return pd.concat([current, all, *dfs], ignore_index=True)
diff --git a/cell_census_builder/tissue_mapper.py b/cell_census_builder/tissue_mapper.py
new file mode 100644
index 000000000..073e44a6e
--- /dev/null
+++ b/cell_census_builder/tissue_mapper.py
@@ -0,0 +1,260 @@
+# type: ignore
+# isort:skip_file
+# flake8: noqa
+"""
+NOTE: This is a (literal) copy of
+https://github.com/chanzuckerberg/single-cell-data-portal/blob/9b94ccb0a2e0a8f6182b213aa4852c491f6f6aff/backend/wmg/data/tissue_mapper.py
+
+This code should not be duplicated, but rather repackaged to be an importable sub-dependency.
+
+This code contains several places that do not pass the lint/static analysis CI for this pipeline, so the analysis is disabled in this prologue.
+"""
+
+import owlready2
+from typing import List
+
+
+class TissueMapper:
+
+    # Name of anatomical structure, used to determine the set of ancestors for a given
+    # entity that we"re interested in.
+    ANATOMICAL_STRUCTURE_NAME = "UBERON_0000061"
+
+    # List of high level tissues, ORDER MATTERS. If for a given tissue there are multiple high-level tissues associated
+    # then `self.get_high_level_tissue()` returns the one that appears first in th this list
+    HIGH_LEVEL_TISSUES = [
+        "UBERON_0000178",  # blood
+        "UBERON_0002048",  # lung
+        "UBERON_0002106",  # spleen
+        "UBERON_0002371",  # bone marrow
+        "UBERON_0002107",  # liver
+        "UBERON_0002113",  # kidney
+        "UBERON_0000955",  # brain
+        "UBERON_0002240",  # spinal cord
+        "UBERON_0000310",  # breast
+        "UBERON_0000948",  # heart
+        "UBERON_0002097",  # skin of body
+        "UBERON_0000970",  # eye
+        "UBERON_0001264",  # pancreas
+        "UBERON_0001043",  # esophagus
+        "UBERON_0001155",  # colon
+        "UBERON_0000059",  # large intestine
+        "UBERON_0002108",  # small intestine
+        "UBERON_0000160",  # intestine
+        "UBERON_0000945",  # stomach
+        "UBERON_0001836",  # saliva
+        "UBERON_0001723",  # tongue
+        "UBERON_0001013",  # adipose tissue
+        "UBERON_0000473",  # testis
+        "UBERON_0002367",  # prostate gland
+        "UBERON_0000057",  # urethra
+        "UBERON_0000056",  # ureter
+        "UBERON_0003889",  # fallopian tube
+        "UBERON_0000995",  # uterus
+        "UBERON_0000992",  # ovary
+        "UBERON_0002110",  # gall bladder
+        "UBERON_0001255",  # urinary bladder
+        "UBERON_0018707",  # bladder organ
+        "UBERON_0000922",  # embryo
+        "UBERON_0004023",  # ganglionic eminence --> this a part of the embryo, remove in case generality is desired
+        "UBERON_0001987",  # placenta
+        "UBERON_0007106",  # chorionic villus
+        "UBERON_0002369",  # adrenal gland
+        "UBERON_0002368",  # endocrine gland
+        "UBERON_0002365",  # exocrine gland
+        "UBERON_0000030",  # lamina propria
+        "UBERON_0000029",  # lymph node
+        "UBERON_0004536",  # lymph vasculature
+        "UBERON_0001015",  # musculature
+        "UBERON_0000004",  # nose
+        "UBERON_0003688",  # omentum
+        "UBERON_0000977",  # pleura
+        "UBERON_0002370",  # thymus
+        "UBERON_0002049",  # vasculature
+        "UBERON_0009472",  # axilla
+        "UBERON_0001087",  # pleural fluid
+        "UBERON_0000344",  # mucosa
+        "UBERON_0001434",  # skeletal system
+        "UBERON_0002228",  # rib
+        "UBERON_0003129",  # skull
+        "UBERON_0004537",  # blood vasculature
+        "UBERON_0002405",  # immune system
+        "UBERON_0001009",  # circulatory system
+        "UBERON_0001007",  # digestive system
+        "UBERON_0001017",  # central nervous system
+        "UBERON_0001008",  # renal system
+        "UBERON_0000990",  # reproductive system
+        "UBERON_0001004",  # respiratory system
+        "UBERON_0000010",  # peripheral nervous system
+        "UBERON_0001032",  # sensory system
+        "UBERON_0002046",  # thyroid gland
+        "UBERON_0004535",  # cardiovascular system
+        "UBERON_0000949",  # endocrine system
+        "UBERON_0002330",  # exocrine system
+        "UBERON_0002390",  # hematopoietic system
+        "UBERON_0000383",  # musculature of body
+        "UBERON_0001465",  # knee
+        "UBERON_0001016",  # nervous system
+        "UBERON_0001348",  # brown adipose tissue
+        "UBERON_0015143",  # mesenteric fat pad
+        "UBERON_0000175",  # pleural effusion
+        "UBERON_0001416",  # skin of abdomen
+        "UBERON_0001868",  # skin of chest
+        "UBERON_0001511",  # skin of leg
+        "UBERON_0002190",  # subcutaneous adipose tissue
+        "UBERON_0035328",  # upper outer quadrant of breast
+        "UBERON_0000014",  # zone of skin
+    ]
+
+    # Terms to ignore when mapping
+    DENY_LIST = [
+        "BFO_0000004",
+        "CARO_0000000",
+        "CARO_0030000",
+        "CARO_0000003",
+        "NCBITaxon_6072",
+        "Thing",
+        "UBERON_0000465",  # material anatomical entity
+        "UBERON_0001062",  # anatomical entity
+    ]
+
+    def __init__(self, uberon_ontology: str = "http://purl.obolibrary.org/obo/uberon.owl"):
+        # TODO: use the pinned ontology at `single-cell-curation`
+        self._uberon = owlready2.get_ontology(uberon_ontology)
+        self._uberon.load()
+        self._cached_tissues = {}
+        self._cached_labels = {}
+
+    def get_high_level_tissue(self, tissue_ontology_term_id: str) -> str:
+        """
+        Returns the associated high-level tissue ontology term ID from any other ID
+        Edge cases:
+            - If multiple high-level tissues exists for a given tissue, returns the one with higher priority (the first
+            appearance in list self.HIGH_LEVEL_TISSUES.
+            - If no high-level tissue is found, returns the same as input.
+            - If the input tissue is not found in the ontology, return the same as input.
+                - This could happen with something like "UBERON:0002048 (cell culture)"
+        """
+
+        tissue_ontology_term_id = self.reformat_ontology_term_id(tissue_ontology_term_id, to_writable=False)
+
+        if tissue_ontology_term_id in self._cached_tissues:
+            # If we have looked this up already
+            return self._cached_tissues[tissue_ontology_term_id]
+
+        entity = self._get_entity_from_id(tissue_ontology_term_id)
+
+        if not entity:
+            # If not found as an ontology ID return itself
+            result = self.reformat_ontology_term_id(tissue_ontology_term_id, to_writable=True)
+            self._cached_tissues[tissue_ontology_term_id] = result
+            return result
+
+        # List ancestors for this entity, including itself. Ignore any ancestors that
+        # are not descendents of UBERON_0000061 (anatomical structure).
+        ancestors = [entity.name]
+        branch_ancestors = []
+        for is_a in entity.is_a:
+            branch_ancestors = self._list_ancestors(is_a, branch_ancestors)
+
+        # Include this branch of ancestors is under anatomical structure
+        if self.ANATOMICAL_STRUCTURE_NAME in branch_ancestors:
+            ancestors.extend(branch_ancestors)
+
+        # Check if there's at least one top-level entity in the list of ancestors
+        # for this entity
+        selected_tissue = tissue_ontology_term_id
+        for high_level_tissue in self.HIGH_LEVEL_TISSUES:
+            if high_level_tissue in ancestors:
+                selected_tissue = high_level_tissue
+                break
+
+        result = self.reformat_ontology_term_id(selected_tissue, to_writable=True)
+        self._cached_tissues[tissue_ontology_term_id] = result
+        return result
+
+    def get_label_from_writable_id(self, ontology_term_id: str):
+        """
+        Returns the label from and ontology term id that is in writable form
+        Example: "UBERON:0002048" returns "lung"
+        Example: "UBERON_0002048" raises ValueError because the ID is not in writable form
+        """
+
+        if ontology_term_id in self._cached_labels:
+            return self._cached_labels[ontology_term_id]
+
+        entity = self._get_entity_from_id(self.reformat_ontology_term_id(ontology_term_id, to_writable=False))
+        if entity:
+            result = entity.label[0]
+        else:
+            result = ontology_term_id
+
+        self._cached_labels[ontology_term_id] = result
+        return result
+
+    @staticmethod
+    def reformat_ontology_term_id(ontology_term_id: str, to_writable: bool = True):
+        """
+        Converts ontology term id string between two formats:
+            - `to_writable == True`: from "UBERON_0002048" to "UBERON:0002048"
+            - `to_writable == False`: from "UBERON:0002048" to "UBERON_0002048"
+        """
+
+        if to_writable:
+            if ontology_term_id.count("_") != 1:
+                raise ValueError(f"{ontology_term_id} is an invalid ontology term id, it must contain exactly one '_'")
+            return ontology_term_id.replace("_", ":")
+        else:
+            if ontology_term_id.count(":") != 1:
+                raise ValueError(f"{ontology_term_id} is an invalid ontology term id, it must contain exactly one ':'")
+            return ontology_term_id.replace(":", "_")
+
+    def _list_ancestors(self, entity: owlready2.entity.ThingClass, ancestors: List[str] = []) -> List[str]:
+        """
+        Recursive function that given an entity of an ontology, it traverses the ontology and returns
+        a list of all ancestors associated with the entity.
+        """
+
+        if self._is_restriction(entity):
+            # Entity is a restriction, check for part_of relationship
+
+            prop = entity.property.name
+            if prop != "BFO_0000050":
+                # BFO_0000050 is "part of"
+                return ancestors
+            ancestors.append(entity.value.name.replace("obo.", ""))
+
+            # Check for ancestors of restriction
+            self._list_ancestors(entity.value, ancestors)
+            return ancestors
+
+        elif self._is_entity(entity) and not self._is_and_object(entity):
+            # Entity is a superclass, check for is_a relationships
+
+            if entity.name in self.DENY_LIST:
+                return ancestors
+            ancestors.append(entity.name)
+
+            # Check for ancestors of superclass
+            for super_entity in entity.is_a:
+                self._list_ancestors(super_entity, ancestors)
+            return ancestors
+
+    def _get_entity_from_id(self, ontology_term_id: str) -> owlready2.entity.ThingClass:
+        """
+        Given a readable ontology term id (e.g. "UBERON_0002048"), it returns the associated ontology entity
+        """
+        # TODO: use the pinned ontology at `single-cell-curation`
+        return self._uberon.search_one(iri=f"http://purl.obolibrary.org/obo/{ontology_term_id}")
+
+    @staticmethod
+    def _is_restriction(entity: owlready2.entity.ThingClass) -> bool:
+        return hasattr(entity, "value")
+
+    @staticmethod
+    def _is_entity(entity: owlready2.entity.ThingClass) -> bool:
+        return hasattr(entity, "name")
+
+    @staticmethod
+    def _is_and_object(entity: owlready2.entity.ThingClass) -> bool:
+        return hasattr(entity, "Classes")
diff --git a/cell_census_builder/tools/aws/mount_instance_storage.sh b/cell_census_builder/tools/aws/mount_instance_storage.sh
new file mode 100644
index 000000000..e9c444ffa
--- /dev/null
+++ b/cell_census_builder/tools/aws/mount_instance_storage.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+# This automates mounting all instance (ephemeral) storage onto a file
+# system.  If a single device is found, it creates an ext4 file system.
+# If multiple are found, it creates a RAID0 group, and an ext4 file
+# system on top of it.
+#
+# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/add-instance-store-volumes.html
+# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/raid-config.html
+#
+
+# exit immediately when a command fails
+set -e
+# treat unset variables as an error and exit immediately
+set -u
+# echo each line of the script to stdout so we can see what is happening
+# to turn off echo do 'set +o xtrace'
+set -o xtrace
+
+
+DEVICE_PREFIX="nvme"
+MOUNTPOINT="/mnt/scratch"
+RAID_VOLUME="/dev/md0"
+VOLUME_LABEL="scratch_volume"
+
+
+# Must be run as privledged user
+if [[ $(id -u) != 0 ]]; then
+  echo "ERROR: not root. You must run using sudo. Exiting with no action taken."
+  exit
+fi
+
+# Test for a conflict on the mount point
+if grep -qs ' ${MOUNTPOINT} ' /proc/mounts; then
+  echo "ERROR: ${MOUNTPOINT} aleady in use. Exiting with no action taken."
+  exit
+fi
+
+
+# Detect all blcok devices that are disks, and do not have
+# partitions or other holder devices (eg, part of raid group, etc) 
+function detect_devices {
+  PY_CMD='
+import sys, json
+device_prefix = sys.argv[1]
+bdevs = [
+  dev for dev in json.load(sys.stdin)["blockdevices"] 
+  if dev["type"] == "disk" and "children" not in dev and dev["name"].startswith(device_prefix)
+]
+for d in bdevs:
+  name = d["name"]
+  print(f"/dev/{name}")
+'
+  lsblk --json --output NAME,TYPE,MOUNTPOINT | python3 -c "${PY_CMD}" "$1"
+}
+
+function create_volume {
+  devices_count=$(wc -w <<< $@)
+  if [[ ${devices_count} == 0 ]]; then
+    echo "No devices found, no volume created."
+    exit 1
+  elif [[ ${devices_count} == 1 ]]; then
+    echo "Found single device, creating volume"
+    mkfs.ext4 -L ${VOLUME_LABEL} $@
+  else
+    echo "Found ${devices_count} devices, creating RAID0 volume"
+    mdadm --create --verbose ${RAID_VOLUME} --level=0 --name=${VOLUME_LABEL} --raid-devices=${devices_count} $@
+    mkfs.ext4 -L ${VOLUME_LABEL} ${RAID_VOLUME}
+  fi
+}
+
+function mount_volume {
+  mkdir -p ${MOUNTPOINT}
+  mount LABEL=${VOLUME_LABEL} ${MOUNTPOINT}
+  chmod 777 ${MOUNTPOINT}
+}
+
+create_volume $(detect_devices ${DEVICE_PREFIX})
+mount_volume
+echo "Done. Mounted on ${MOUNTPOINT}."
diff --git a/cell_census_builder/tools/aws/swapon_instance_storage.sh b/cell_census_builder/tools/aws/swapon_instance_storage.sh
new file mode 100644
index 000000000..bd9a086f0
--- /dev/null
+++ b/cell_census_builder/tools/aws/swapon_instance_storage.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+# This automates adding all instance (ephemeral) storage as swap
+#
+# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-store-swap-volumes.html
+
+# exit immediately when a command fails
+set -e
+# treat unset variables as an error and exit immediately
+set -u
+# echo each line of the script to stdout so we can see what is happening
+# to turn off echo do 'set +o xtrace'
+set -o xtrace
+
+DEVICE_PREFIX="nvme"
+
+# Must be run as privledged user
+if [[ $(id -u) != 0 ]]; then
+  echo "ERROR: not root. You must run using sudo. Exiting with no action taken."
+  exit
+fi
+
+# Detect all blcok devices that are disks, and do not have
+# partitions or other holder devices (eg, part of raid group, etc) 
+function detect_devices {
+  PY_CMD='
+import sys, json
+device_prefix = sys.argv[1]
+bdevs = [
+  dev for dev in json.load(sys.stdin)["blockdevices"] 
+  if dev["type"] == "disk" and "children" not in dev and dev["name"].startswith(device_prefix)
+]
+for d in bdevs:
+  name = d["name"]
+  print(f"/dev/{name}")
+'
+  lsblk --json --output NAME,TYPE,MOUNTPOINT | python3 -c "${PY_CMD}" "$1"
+}
+
+for bdev in $(detect_devices ${DEVICE_PREFIX}); do
+  echo "Adding ${bdev}"
+  mkswap ${bdev}
+  swapon -v ${bdev}
+done
+
+echo "Done, swapping on devices:"
+swapon -s
diff --git a/cell_census_builder/util.py b/cell_census_builder/util.py
new file mode 100644
index 000000000..678b2a0e8
--- /dev/null
+++ b/cell_census_builder/util.py
@@ -0,0 +1,128 @@
+import urllib.parse
+from typing import Any, Union
+from warnings import warn
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import requests
+from scipy import sparse
+
+
+def array_chunker(arr: Union[npt.NDArray[Any], sparse.spmatrix]) -> sparse.coo_matrix:
+    """
+    Return the array as multiple chunks, each a coo_matrix.
+    """
+    nnz_chunk_size = 256 * 1024**2  # goal (~2.4GiB for a 32-bit COO)
+
+    if isinstance(arr, sparse.csr_matrix) or isinstance(arr, sparse.csr_array):
+        avg_nnz_per_row = arr.nnz // arr.shape[0]
+        row_chunk_size = max(1, round(nnz_chunk_size / avg_nnz_per_row))
+        for row_idx in range(0, arr.shape[0], row_chunk_size):
+            slc = arr[row_idx : row_idx + row_chunk_size, :].tocoo()
+            slc.resize(arr.shape)
+            slc.row += row_idx
+            yield slc
+        return
+
+    if isinstance(arr, sparse.csc_matrix) or isinstance(arr, sparse.csc_array):
+        avg_nnz_per_col = arr.nnz // arr.shape[1]
+        col_chunk_size = max(1, round(nnz_chunk_size / avg_nnz_per_col))
+        for col_idx in range(0, arr.shape[1], col_chunk_size):
+            slc = arr[:, col_idx : col_idx + col_chunk_size].tocoo()
+            slc.resize(arr.shape)
+            slc.col += col_idx
+            yield slc
+        return
+
+    if isinstance(arr, np.ndarray):
+        row_chunk_size = max(1, nnz_chunk_size // arr.shape[1])
+        for row_idx in range(0, arr.shape[0], row_chunk_size):
+            slc = sparse.coo_matrix(arr[row_idx : row_idx + row_chunk_size, :])
+            slc.resize(arr.shape)
+            slc.row += row_idx
+            yield slc
+        return
+
+    raise NotImplementedError("array_chunker: unsupported array type")
+
+
+def uricat(container_uri: str, *paths: str) -> str:
+    """
+    Concat one or more paths, separated with '/'
+
+    Similar to urllib.parse.urljoin except it takes an iterator, and
+    assumes the container_uri is a 'directory'/container, ie, ends in '/'.
+    """
+
+    uri = container_uri
+    for p in paths:
+        uri = uri if uri.endswith("/") else uri + "/"
+        uri = urllib.parse.urljoin(uri, p)
+    return uri
+
+
+def fetch_json(url: str) -> object:
+    response = requests.get(url)
+    response.raise_for_status()
+    return response.json()
+
+
+def is_positive_integral(X: Union[npt.NDArray[np.floating[Any]], sparse.spmatrix]) -> bool:
+    """
+    Return true if the matrix/array contains only positive integral values,
+    False otherwise.
+    """
+    data = X if isinstance(X, np.ndarray) else X.data
+
+    if np.signbit(data).any():
+        return False
+    elif np.any(~np.equal(np.mod(data, 1), 0)):
+        return False
+    else:
+        return True
+
+
+def pandas_dataframe_strings_to_ascii_issue_247_workaround(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    TileDB-SOMA _temporarily_ supports only ASCII in strings.
+    This code will convert all strings in a dataframe to ascii.
+    Remove this code when TileDB-SOMA#274 is resolved.
+    """
+    import unicodedata
+
+    warn("Converting dataframe strings to ASCII as temporary work-around for TileDB-SOMA#274.")
+    for k in df:
+        if df[k].dtype == object:
+            df[k] = df[k].map(lambda val: unicodedata.normalize("NFKD", val).encode("ascii", "ignore").decode())
+
+    return df
+
+
+def anndata_ordered_bool_issue_853_workaround(df: pd.DataFrame) -> pd.DataFrame:
+    # """
+    # TileDB-SOMA does not support creating dataframe with categorical / dictionary
+    # column types.
+    # """
+    # copied = False
+    # for k in df.keys():
+    #     if pd.api.types.is_categorical_dtype(df[k]):
+    #         if not copied:
+    #             df = df.copy()
+    #             copied = True
+
+    #         df[k] = df[k].astype(df[k].cat.categories.dtype)
+
+    # AnnData has a bug (https://github.com/scverse/anndata/issues/853) which will
+    # cause Pandas CategoricalDtype `ordered` to be a numpy.bool_, rather than a bool.
+    # This causes Arrow to blow up.
+    copied = False
+    for k in df.keys():
+        if pd.api.types.is_categorical_dtype(df[k]) and type(df[k].cat.ordered) == np.bool_:
+            if not copied:
+                df = df.copy()
+                copied = True
+
+            df[k] = df[k].cat.set_categories(df[k].cat.categories, ordered=bool(df[k].cat.ordered))
+
+    return df
diff --git a/cell_census_builder/validate.py b/cell_census_builder/validate.py
new file mode 100644
index 000000000..d1d18ad0a
--- /dev/null
+++ b/cell_census_builder/validate.py
@@ -0,0 +1,374 @@
+import argparse
+import concurrent.futures
+import dataclasses
+import logging
+import os.path
+import pathlib
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, List, Tuple, Union, cast
+
+import numpy as np
+import numpy.typing as npt
+import pyarrow as pa
+import tiledbsoma as soma
+from scipy import sparse
+
+from .anndata import make_anndata_cell_filter, open_anndata
+from .datasets import Dataset
+from .experiment_builder import ExperimentBuilder
+from .globals import (
+    CENSUS_DATASETS_COLUMNS,
+    CENSUS_DATASETS_NAME,
+    CENSUS_OBS_TERM_COLUMNS,
+    CENSUS_SCHEMA_VERSION,
+    CENSUS_SUMMARY_CELL_COUNTS_COLUMNS,
+    CENSUS_SUMMARY_CELL_COUNTS_NAME,
+    CENSUS_SUMMARY_NAME,
+    CENSUS_VAR_TERM_COLUMNS,
+    CXG_OBS_TERM_COLUMNS,
+    CXG_SCHEMA_VERSION,
+    X_LAYERS,
+    TileDB_Ctx,
+)
+from .mp import create_process_pool_executor
+from .util import uricat
+
+
+@dataclass
+class EbInfo:
+    """Class used to collect information about axis (for validation code)"""
+
+    n_obs: int = 0
+    vars: set[str] = dataclasses.field(default_factory=set)
+    dataset_ids: set[str] = dataclasses.field(default_factory=set)
+
+    def update(self: "EbInfo", b: "EbInfo") -> "EbInfo":
+        self.n_obs += b.n_obs
+        self.vars |= b.vars
+        self.dataset_ids |= b.dataset_ids
+        return self
+
+
+def validate_all_soma_objects_exist(soma_path: str, experiment_builders: List[ExperimentBuilder]) -> bool:
+    """
+    Validate all objects present and contain expected metadata.
+
+    soma_path
+        +-- census_info
+        |   +-- summary: soma.DataFrame
+        |   +-- datasets: soma.DataFrame
+        |   +-- summary_cell_counts: soma.DataFrame
+        +-- census_data
+        |   +-- homo_sapiens: soma.Experiment
+        |   +-- mus_musculus: soma.Experiment
+    """
+    census = soma.Collection(soma_path, ctx=TileDB_Ctx())
+    assert census.exists() and census.soma_type == "SOMACollection"
+    assert "cxg_schema_version" in census.metadata and census.metadata["cxg_schema_version"] == CXG_SCHEMA_VERSION
+    assert (
+        "census_schema_version" in census.metadata and census.metadata["census_schema_version"] == CENSUS_SCHEMA_VERSION
+    )
+    assert "created_on" in census.metadata and datetime.fromisoformat(census.metadata["created_on"])
+
+    for name in ["census_info", "census_data"]:
+        assert name in census
+        assert census[name].soma_type == "SOMACollection"
+        assert census[name].exists()
+
+    census_info = census["census_info"]
+    for name in [CENSUS_DATASETS_NAME, CENSUS_SUMMARY_NAME, CENSUS_SUMMARY_CELL_COUNTS_NAME]:
+        assert name in census_info, f"`{name}` missing from census_info"
+        assert census_info[name].soma_type == "SOMADataFrame"
+        assert census_info[name].exists()
+
+    assert sorted(census_info[CENSUS_DATASETS_NAME].keys()) == sorted(CENSUS_DATASETS_COLUMNS + ["soma_joinid"])
+    assert sorted(census_info[CENSUS_SUMMARY_CELL_COUNTS_NAME].keys()) == sorted(
+        list(CENSUS_SUMMARY_CELL_COUNTS_COLUMNS) + ["soma_joinid"]
+    )
+    assert sorted(census_info[CENSUS_SUMMARY_NAME].keys()) == sorted(["label", "value", "soma_joinid"])
+
+    # there should be an experiment for each builder
+    census_data = census["census_data"]
+    for eb in experiment_builders:
+        assert (
+            eb.name in census_data
+            and census_data[eb.name].exists()
+            and census_data[eb.name].soma_type == "SOMAExperiment"
+        )
+
+        e = census_data[eb.name]
+        assert "obs" in e and e.obs.exists() and e.obs.soma_type == "SOMADataFrame"
+        assert "ms" in e and e.ms.exists() and e.ms.soma_type == "SOMACollection"
+
+        # there should be a single measurement called 'RNA'
+        assert "RNA" in e.ms and e.ms["RNA"].exists() and e.ms["RNA"].soma_type == "SOMAMeasurement"
+
+        # The measurement should contain all X layers where n_obs > 0 (existence checked elsewhere)
+        rna = e.ms["RNA"]
+        assert "var" in rna and rna["var"].exists() and rna["var"].soma_type == "SOMADataFrame"
+        assert "X" in rna and rna["X"].exists() and rna["X"].soma_type == "SOMACollection"
+        for lyr in X_LAYERS:
+            # layers only exist if there are cells in the measurement
+            if lyr in rna.X:
+                assert rna.X[lyr].exists() and rna.X[lyr].soma_type == "SOMASparseNdArray"
+
+        # and a presence matrix
+        assert "varp" in rna and rna["varp"].exists() and rna["varp"].soma_type == "SOMACollection"
+        # dataset presence only exists if there are cells in the measurement
+        if "dataset_presence_matrix" in rna.varp:
+            assert rna.varp["dataset_presence_matrix"].exists()
+            assert rna.varp["dataset_presence_matrix"].soma_type == "SOMASparseNdArray"
+
+    return True
+
+
+def _validate_axis_dataframes(args: Tuple[str, str, Dataset, List[ExperimentBuilder]]) -> Dict[str, EbInfo]:
+    assets_path, soma_path, dataset, experiment_builders = args
+    census = soma.Collection(soma_path, ctx=TileDB_Ctx())
+    census_data = census["census_data"]
+    dataset_id = dataset.dataset_id
+    _, unfiltered_ad = next(open_anndata(assets_path, [dataset], backed="r"))
+    eb_info: Dict[str, EbInfo] = {}
+    for eb in experiment_builders:
+        eb_info[eb.name] = EbInfo()
+        anndata_cell_filter = make_anndata_cell_filter(eb.anndata_cell_filter_spec)
+        se = census_data[eb.name]
+        ad = anndata_cell_filter(unfiltered_ad, retain_X=False)
+        dataset_obs = (
+            se.obs.read_as_pandas_all(
+                column_names=list(CENSUS_OBS_TERM_COLUMNS),
+                value_filter=f"dataset_id == '{dataset_id}'",
+            )
+            .drop(columns=["dataset_id", "tissue_general", "tissue_general_ontology_term_id"])
+            .sort_values(by="soma_joinid")
+            .drop(columns=["soma_joinid"])
+            .reset_index(drop=True)
+        )
+
+        assert len(dataset_obs) == len(ad.obs), f"{dataset.dataset_id}/{eb.name} obs length mismatch"
+        if ad.n_obs > 0:
+            eb_info[eb.name].n_obs += ad.n_obs
+            eb_info[eb.name].dataset_ids.add(dataset_id)
+            eb_info[eb.name].vars |= set(ad.var.index.array)
+            ad_obs = ad.obs[list(CXG_OBS_TERM_COLUMNS)].reset_index(drop=True)
+            assert (dataset_obs == ad_obs).all().all(), f"{dataset.dataset_id}/{eb.name} obs content, mismatch"
+
+    return eb_info
+
+
+def validate_axis_dataframes(
+    assets_path: str,
+    soma_path: str,
+    datasets: List[Dataset],
+    experiment_builders: List[ExperimentBuilder],
+    args: argparse.Namespace,
+) -> bool:
+    """ "
+    Validate axis dataframes: schema, shape, contents
+
+    Raises on error.  Returns True on success.
+    """
+    logging.debug("validate_axis_dataframes")
+    census = soma.Collection(soma_path, ctx=TileDB_Ctx())
+    census_data = census["census_data"]
+
+    # check schema
+    expected_obs_columns = CENSUS_OBS_TERM_COLUMNS
+    expected_var_columns = CENSUS_VAR_TERM_COLUMNS
+    for eb in experiment_builders:
+        obs = census_data[eb.name].obs
+        var = census_data[eb.name].ms["RNA"].var
+        assert sorted(obs.keys()) == sorted(expected_obs_columns.keys())
+        assert sorted(var.keys()) == sorted(expected_var_columns.keys())
+        for field in obs.schema:
+            assert field.name in expected_obs_columns
+            assert field.type == expected_obs_columns[field.name], f"Unexpected type in {field.name}: {field.type}"
+        for field in var.schema:
+            assert field.name in expected_var_columns
+            assert field.type == expected_var_columns[field.name], f"Unexpected type in {field.name}: {field.type}"
+
+    # check shapes & perform weak test of contents
+    eb_info = {eb.name: EbInfo() for eb in experiment_builders}
+    if args.multi_process:
+        with create_process_pool_executor(args) as ppe:
+            futures = [
+                ppe.submit(_validate_axis_dataframes, (assets_path, soma_path, dataset, experiment_builders))
+                for dataset in datasets
+            ]
+            for n, future in enumerate(concurrent.futures.as_completed(futures), start=1):
+                res = future.result()
+                for eb_name, ebi in res.items():
+                    eb_info[eb_name].update(ebi)
+                logging.info(f"validate_axis {n} of {len(datasets)} complete.")
+    else:
+        for n, dataset in enumerate(datasets, start=1):
+            for eb_name, ebi in _validate_axis_dataframes(
+                (assets_path, soma_path, dataset, experiment_builders)
+            ).items():
+                eb_info[eb_name].update(ebi)
+            logging.info(f"validate_axis {n} of {len(datasets)} complete.")
+
+    for eb in experiment_builders:
+        se = census_data[eb.name]
+        n_vars = len(eb_info[eb.name].vars)
+
+        census_obs_df = se.obs.read_as_pandas_all(column_names=["soma_joinid", "dataset_id"])
+        assert eb_info[eb.name].n_obs == len(census_obs_df)
+        assert (len(census_obs_df) == 0) or (census_obs_df.soma_joinid.max() + 1 == eb_info[eb.name].n_obs)
+        assert eb_info[eb.name].dataset_ids == set(census_obs_df.dataset_id.unique())
+
+        census_var_df = se.ms["RNA"].var.read_as_pandas_all(column_names=["feature_id", "soma_joinid"])
+        assert n_vars == len(census_var_df)
+        assert eb_info[eb.name].vars == set(census_var_df.feature_id.array)
+        assert (len(census_var_df) == 0) or (census_var_df.soma_joinid.max() + 1 == n_vars)
+
+    return True
+
+
+def _validate_X_layers_contents(args: Tuple[str, str, Dataset, List[ExperimentBuilder]]) -> bool:
+    """
+    Validate that a single dataset is correctly represented in the census.
+    Intended to be dispatched from validate_X_layers.
+
+    Currently implements a weak test: that nnz is correct.
+    """
+    assets_path, soma_path, dataset, experiment_builders = args
+    census = soma.Collection(soma_path, ctx=TileDB_Ctx())
+    census_data = census["census_data"]
+    _, unfiltered_ad = next(open_anndata(assets_path, [dataset]))
+    for eb in experiment_builders:
+        se = census_data[eb.name]
+        anndata_cell_filter = make_anndata_cell_filter(eb.anndata_cell_filter_spec)
+        ad = anndata_cell_filter(unfiltered_ad, retain_X=True)
+
+        soma_joinids: npt.NDArray[np.int64] = se.obs.read_as_pandas_all(
+            column_names=["soma_joinid", "dataset_id"], value_filter=f"dataset_id == '{dataset.dataset_id}'"
+        ).soma_joinid.to_numpy()
+
+        raw_nnz = 0
+        if len(soma_joinids) > 0:
+            assert "raw" in se.ms["RNA"].X and se.ms["RNA"].X["raw"].exists()
+
+            def count_elements(arr: soma.SparseNdArray, join_ids: npt.NDArray[np.int64]) -> int:
+                # TODO XXX: Work-around for regression TileDB-SOMA#473
+                # return sum(t.non_zero_length for t in arr.read_sparse_tensor((join_ids, slice(None))))
+                return sum(t.non_zero_length for t in arr.read_sparse_tensor((pa.array(join_ids), slice(None))))
+
+            raw_nnz = count_elements(se.ms["RNA"].X["raw"], soma_joinids)
+
+        def nnz(arr: Union[sparse.spmatrix, npt.NDArray[Any]]) -> int:
+            if isinstance(arr, (sparse.spmatrix, sparse.coo_array, sparse.csr_array, sparse.csc_array)):
+                return cast(int, arr.nnz)
+            return np.count_nonzero(arr)
+
+        if ad.raw is None:
+            assert raw_nnz == nnz(ad.X), f"{eb.name}:{dataset.dataset_id} 'raw' nnz mismatch {raw_nnz} vs {nnz(ad.X)}"
+        else:
+            assert raw_nnz == nnz(
+                ad.raw.X
+            ), f"{eb.name}:{dataset.dataset_id} 'raw' nnz mismatch {raw_nnz} vs {nnz(ad.raw.X)}"
+
+    return True
+
+
+def validate_X_layers(
+    assets_path: str,
+    soma_path: str,
+    datasets: List[Dataset],
+    experiment_builders: List[ExperimentBuilder],
+    args: argparse.Namespace,
+) -> bool:
+    """ "
+    Validate all X layers: schema, shape, contents
+
+    Raises on error.  Returns True on success.
+    """
+    logging.debug("validate_X_layers")
+    census = soma.Collection(soma_path, ctx=TileDB_Ctx())
+    census_data = census["census_data"]
+
+    for eb in experiment_builders:
+        se = census_data[eb.name]
+        assert se.ms["RNA"].X.exists()
+
+        census_obs_df = se.obs.read_as_pandas_all(column_names=["soma_joinid"])
+        n_obs = len(census_obs_df)
+        census_var_df = se.ms["RNA"].var.read_as_pandas_all(column_names=["feature_id", "soma_joinid"])
+        n_vars = len(census_var_df)
+
+        if n_obs > 0:
+            for lyr in X_LAYERS:
+                assert se.ms["RNA"].X[lyr].exists()
+                X = se.ms["RNA"].X[lyr]
+                assert X.schema.field("soma_dim_0").type == pa.int64()
+                assert X.schema.field("soma_dim_1").type == pa.int64()
+                assert X.schema.field("soma_data").type == pa.float32()
+                assert X.shape == (n_obs, n_vars)
+
+    if args.multi_process:
+        with create_process_pool_executor(args) as ppe:
+            futures = [
+                ppe.submit(_validate_X_layers_contents, (assets_path, soma_path, dataset, experiment_builders))
+                for dataset in datasets
+            ]
+            for n, future in enumerate(concurrent.futures.as_completed(futures), start=1):
+                assert future.result()
+                logging.info(f"validate_X {n} of {len(datasets)} complete.")
+    else:
+        for n, vld in enumerate(
+            (
+                _validate_X_layers_contents((assets_path, soma_path, dataset, experiment_builders))
+                for dataset in datasets
+            ),
+            start=1,
+        ):
+            logging.info(f"validate_X {n} of {len(datasets)} complete.")
+            assert vld
+
+    return True
+
+
+def load_datasets_from_census(assets_path: str, soma_path: str) -> List[Dataset]:
+    # Datasets are pulled from the census datasets manifest, validating the SOMA
+    # census against the snapshot assets.
+    df = soma.Collection(soma_path)["census_info"][CENSUS_DATASETS_NAME].read_as_pandas_all()
+    df.drop(columns=["soma_joinid"], inplace=True)
+    df["corpora_asset_h5ad_uri"] = df.dataset_h5ad_path.map(lambda p: uricat(assets_path, p))
+    datasets = Dataset.from_dataframe(df)
+    return datasets
+
+
+def validate_manifest_contents(assets_path: str, datasets: List[Dataset]) -> bool:
+    """Confirm contents of manifest are correct."""
+    for d in datasets:
+        p = pathlib.Path(uricat(assets_path, d.dataset_h5ad_path))
+        assert p.exists() and p.is_file(), f"{d.dataset_h5ad_path} is missing from the census"
+        assert str(p).endswith(".h5ad"), "Expected only H5AD assets"
+
+    return True
+
+
+def validate(args: argparse.Namespace, experiment_builders: List[ExperimentBuilder]) -> bool:
+    """
+    Validate that the "census" matches the datasets and experiment builder spec.
+
+    Will raise if validation fails. Returns True on success.
+    """
+    logging.info("Validation start")
+
+    base_path = uricat(args.uri, args.build_tag)
+    soma_path = uricat(base_path, "soma")
+    assets_path = uricat(base_path, "h5ads")
+
+    assert os.path.exists(soma_path) and os.path.exists(assets_path)
+
+    assert validate_all_soma_objects_exist(soma_path, experiment_builders)
+
+    datasets = load_datasets_from_census(assets_path, soma_path)
+    assert validate_manifest_contents(assets_path, datasets)
+
+    assert validate_axis_dataframes(assets_path, soma_path, datasets, experiment_builders, args)
+    assert validate_X_layers(assets_path, soma_path, datasets, experiment_builders, args)
+    logging.info("Validation success")
+    return True
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..5bc9af63d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,14 @@
+[tool.black]
+line-length = 120
+target_version = ['py39']
+
+[tool.isort]
+profile="black"
+line_length = 120
+
+[tool.mypy]
+show_error_codes = true
+ignore_missing_imports = true
+warn_unreachable = true
+strict = true
+plugins = "numpy.typing.mypy_plugin"