diff --git a/.coveragerc b/.coveragerc index 4edd7b1a2..0a4062294 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,7 @@ [run] relative_files = True + +[report] +omit = + # torch-generated JIT files + _remote_module_non_scriptable.py diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index f40a0273a..e5516967c 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -31,7 +31,7 @@ dependencies= [ # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to # ensure that the assets are readable (tiledbsoma supports backward compatible reading). # Make sure this version does not fall behind the builder's tiledbsoma version. - "tiledbsoma~=1.6.0", + "tiledbsoma~=1.7.0", "anndata", "numpy>=1.21,<1.25", # numpy is constrained by numba and the old pip solver "requests", diff --git a/api/r/cellxgene.census/DESCRIPTION b/api/r/cellxgene.census/DESCRIPTION index a2496d228..4877f01d7 100644 --- a/api/r/cellxgene.census/DESCRIPTION +++ b/api/r/cellxgene.census/DESCRIPTION @@ -1,6 +1,6 @@ Package: cellxgene.census Title: CZ CELLxGENE Discover Cell Census -Version: 1.9.1 +Version: 1.10.0 Authors@R: person("Chan Zuckerberg Initiative Foundation", email = "soma@chanzuckerberg.com", role = c("aut", "cre", "cph", "fnd")) diff --git a/api/r/cellxgene.census/_pkgdown.yml b/api/r/cellxgene.census/_pkgdown.yml index 30c338890..3a07ebe31 100644 --- a/api/r/cellxgene.census/_pkgdown.yml +++ b/api/r/cellxgene.census/_pkgdown.yml @@ -41,6 +41,10 @@ reference: contents: - get_census_version_description - get_census_version_directory +- title: Census mirrors + contents: + - get_census_mirror + - get_census_mirror_directory articles: - title: Embeddings diff --git a/api/r/cellxgene.census/docs/404.html b/api/r/cellxgene.census/docs/404.html index e0cc8d957..4bd960f5f 100644 --- a/api/r/cellxgene.census/docs/404.html +++ b/api/r/cellxgene.census/docs/404.html @@ -30,7 +30,7 @@ cellxgene.census - 1.9.1 + 1.10.0 + + + + + +
+ + + + +
+
+ + + + +
+

Axis Query Example +

+

Goal: demonstrate basic axis metadata handling.

+

The CZ CELLxGENE Census stores obs (cell) metadata in a SOMA +DataFrame, which can be queried and read as an R data frame. The Census +also has a convenience package which simplifies opening the census.

+

R data frames are in-memory objects. Take care that queries are small +enough for results to fit in memory.

+
+

Opening the census +

+

The cellxgene.census R package contains a convenient API +to open the latest version of the Census.

+
+census <- cellxgene.census::open_soma()
+

You can learn more about the cellxgene.census methods by accessing +their corresponding documentation. For example +?cellxgene.census::open_soma.

+
+
+

Summarize Census cell metadata +

+

Tips:

+
    +
  • You can read an entire SOMA dataframe into R using +as.data.frame(soma_df$read()).
  • +
  • Queries will be much faster if you request only the DataFrame +columns required for your analysis +(e.g. column_names = c("soma_joinid", "cell_type_ontology_term_id")).
  • +
  • You can also further refine query results by using a +value_filter, which will filter the census for matching +records.
  • +
+
+

Summarize all cell types +

+

This example reads the cell metadata (obs) into an R data frame to +summarize in a variety of ways.

+
+human <- census$get("census_data")$get("homo_sapiens")
+
+# Read obs into an R data frame (tibble).
+obs_df <- as.data.frame(human$obs$read(
+  column_names = c("soma_joinid", "cell_type_ontology_term_id")
+))
+
+# Find all unique values in the cell_type_ontology_term_id column.
+unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)
+
+cat(paste(
+  "There are",
+  length(unique_cell_type_ontology_term_id),
+  "cell types in the Census! The first few are:"
+))
+#> There are 604 cell types in the Census! The first few are:
+head(unique_cell_type_ontology_term_id)
+#> [1] "CL:0000540" "CL:0000738" "CL:0000763" "CL:0000136" "CL:0000235"
+#> [6] "CL:0000115"
+
+
+

Summarize a subset of cell types, selected with a +value_filter +

+

This example utilizes a SOMA “value filter” to read the subset of +cells with tissue_ontology_term_id equal to +UBERON:0002048 (lung tissue), and summarizes the query +result.

+
+# Read cell_type terms for cells which have a specific tissue term
+LUNG_TISSUE <- "UBERON:0002048"
+
+obs_df <- as.data.frame(human$obs$read(
+  column_names = c("cell_type_ontology_term_id"),
+  value_filter = paste("tissue_ontology_term_id == '", LUNG_TISSUE, "'", sep = "")
+))
+
+# Find all unique values in the cell_type_ontology_term_id column as an R data frame.
+unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)
+cat(paste(
+  "There are ",
+  length(unique_cell_type_ontology_term_id),
+  " cell types in the Census where tissue_ontology_term_id == ",
+  LUNG_TISSUE,
+  "!\nThe first few are:",
+  sep = ""
+))
+#> There are 185 cell types in the Census where tissue_ontology_term_id == UBERON:0002048!
+#> The first few are:
+head(unique_cell_type_ontology_term_id)
+#> [1] "CL:0000003" "CL:4028004" "CL:0002145" "CL:0000625" "CL:0000624"
+#> [6] "CL:4028006"
+
+# Report the 10 most common
+top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10]
+cat(paste("The top 10 cell types where tissue_ontology_term_id ==", LUNG_TISSUE))
+#> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048
+print(top_10)
+#> 
+#> CL:0000003 CL:0000583 CL:0000625 CL:0000624 CL:0000235 CL:0002063 CL:0000860 
+#>     562038     526859     323433     323067     254173     246279     203526 
+#> CL:0000623 CL:0001064 CL:0002632 
+#>     164944     149067     132243
+

You can also define much more complex value filters. For example:

+
    +
  • combine terms with and and or +
  • +
  • use the %in% operator to query on multiple values
  • +
+
+# You can also do more complex queries, such as testing for inclusion in a list of values
+obs_df <- as.data.frame(human$obs$read(
+  column_names = c("cell_type_ontology_term_id"),
+  value_filter = "tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')"
+))
+
+# Summarize
+top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10]
+print(top_10)
+#> 
+#> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669 
+#>     159096      84750      79618      64190      61830      32088      27515 
+#> CL:0000003 CL:0000057 CL:0002144 
+#>      22707      20117      18593
+
+
+

Full census stats +

+

This example queries all organisms in the Census, and summarizes the +diversity of various metadata labels.

+
+cols_to_query <- c(
+  "cell_type_ontology_term_id",
+  "assay_ontology_term_id",
+  "tissue_ontology_term_id"
+)
+
+total_cells <- 0
+for (organism in census$get("census_data")$names()) {
+  print(organism)
+  obs_df <- as.data.frame(
+    census$get("census_data")$get(organism)$obs$read(column_names = cols_to_query)
+  )
+  total_cells <- total_cells + nrow(obs_df)
+  for (col in cols_to_query) {
+    cat(paste("  Unique ", col, " values: ", length(unique(obs_df[[col]])), "\n", sep = ""))
+  }
+}
+#> [1] "homo_sapiens"
+#>   Unique cell_type_ontology_term_id values: 604
+#>   Unique assay_ontology_term_id values: 20
+#>   Unique tissue_ontology_term_id values: 227
+#> [1] "mus_musculus"
+#>   Unique cell_type_ontology_term_id values: 226
+#>   Unique assay_ontology_term_id values: 9
+#>   Unique tissue_ontology_term_id values: 51
+cat(paste("Complete Census contains", total_cells, "cells."))
+#> Complete Census contains 60361716 cells.
+
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/api/r/cellxgene.census/docs/articles/census_citation_generation.html b/api/r/cellxgene.census/docs/articles/census_citation_generation.html index 247eef148..c41c861df 100644 --- a/api/r/cellxgene.census/docs/articles/census_citation_generation.html +++ b/api/r/cellxgene.census/docs/articles/census_citation_generation.html @@ -32,7 +32,7 @@ cellxgene.census - 1.9.1 + 1.10.0
+ + + + + +
+
+
+ +
+

Get locator information about a Census mirror

+
+ +
+

Usage

+
get_census_mirror(mirror)
+
+ +
+

Arguments

+
mirror
+

Name of the mirror.

+ +
+
+

Value

+ + +

List with mirror information

+
+ +
+

Examples

+
get_census_mirror("AWS-S3-us-west-2")
+#> $provider
+#> [1] "S3"
+#> 
+#> $base_uri
+#> [1] "s3://cellxgene-census-public-us-west-2/"
+#> 
+#> $region
+#> [1] "us-west-2"
+#> 
+#> $alias
+#> [1] ""
+#> 
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/api/r/cellxgene.census/docs/reference/get_census_mirror_directory.html b/api/r/cellxgene.census/docs/reference/get_census_mirror_directory.html new file mode 100644 index 000000000..5630440b2 --- /dev/null +++ b/api/r/cellxgene.census/docs/reference/get_census_mirror_directory.html @@ -0,0 +1,137 @@ + +Get the directory of Census mirrors currently available — get_census_mirror_directory • cellxgene.census + Skip to contents + + +
+
+
+ +
+

Get the directory of Census mirrors currently available

+
+ +
+

Usage

+
get_census_mirror_directory()
+
+ +
+

Value

+ + +

Nested list with information about available mirrors

+
+ +
+

Examples

+
get_census_mirror_directory()
+#> $default
+#> $default$provider
+#> [1] "S3"
+#> 
+#> $default$base_uri
+#> [1] "s3://cellxgene-census-public-us-west-2/"
+#> 
+#> $default$region
+#> [1] "us-west-2"
+#> 
+#> $default$alias
+#> [1] "default"
+#> 
+#> 
+#> $`AWS-S3-us-west-2`
+#> $`AWS-S3-us-west-2`$provider
+#> [1] "S3"
+#> 
+#> $`AWS-S3-us-west-2`$base_uri
+#> [1] "s3://cellxgene-census-public-us-west-2/"
+#> 
+#> $`AWS-S3-us-west-2`$region
+#> [1] "us-west-2"
+#> 
+#> $`AWS-S3-us-west-2`$alias
+#> [1] ""
+#> 
+#> 
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/api/r/cellxgene.census/docs/reference/get_census_version_description.html b/api/r/cellxgene.census/docs/reference/get_census_version_description.html index d06f4fdd2..6deff837f 100644 --- a/api/r/cellxgene.census/docs/reference/get_census_version_description.html +++ b/api/r/cellxgene.census/docs/reference/get_census_version_description.html @@ -10,7 +10,7 @@ cellxgene.census - 1.9.1 + 1.10.0