census_counts <- as.data.frame(census$get("census_info")$get("summary_cell_counts")$read()$concat())
head(census_counts)
-#> # A tibble: 6 × 7
-#> soma_joinid organism category ontology_term_id unique_cell_count total_cell_count
-#> <int> <chr> <chr> <chr> <int> <int>
-#> 1 0 Homo sapiens all na 33364242 56400873
-#> 2 1 Homo sapiens assay EFO:0008722 264166 279635
-#> 3 2 Homo sapiens assay EFO:0008780 25652 51304
-#> 4 3 Homo sapiens assay EFO:0008919 89477 206754
-#> 5 4 Homo sapiens assay EFO:0008931 78750 188248
-#> 6 5 Homo sapiens assay EFO:0008953 4693 9386
-#> # ℹ 1 more variable: label <chr>
+#> soma_joinid organism category ontology_term_id unique_cell_count total_cell_count
+#> 1 0 Homo sapiens all na 33364242 56400873
+#> 2 1 Homo sapiens assay EFO:0008722 264166 279635
+#> 3 2 Homo sapiens assay EFO:0008780 25652 51304
+#> 4 3 Homo sapiens assay EFO:0008919 89477 206754
+#> 5 4 Homo sapiens assay EFO:0008931 78750 188248
+#> 6 5 Homo sapiens assay EFO:0008953 4693 9386
+#> label
+#> 1 na
+#> 2 Drop-seq
+#> 3 inDrop
+#> 4 Seq-Well
+#> 5 Smart-seq2
+#> 6 STRT-seq
For each combination of organism
and values for each category
of cell metadata you can take a look at total_cell_count
and unique_cell_count
for the cell counts of that combination.
The values for each category
are specified in ontology_term_id
and label
, which are the value’s IDs and labels, respectively.
@@ -288,12 +290,12 @@
Example: number of mic
If you have a specific term from any of the categories shown above you can directly find out the number of cells for that term.
census_counts[census_counts$label == "microglial cell", ]
-#> # A tibble: 2 × 7
-#> soma_joinid organism category ontology_term_id unique_cell_count total_cell_count
-#> <int> <chr> <chr> <chr> <int> <int>
-#> 1 69 Homo sapiens cell_type CL:0000129 268114 370771
-#> 2 1038 Mus musculus cell_type CL:0000129 48998 62617
-#> # ℹ 1 more variable: label <chr>
+
#> soma_joinid organism category ontology_term_id unique_cell_count
+
#> 70 69 Homo sapiens cell_type CL:0000129 268114
+
#> 1039 1038 Mus musculus cell_type CL:0000129 48998
+
#> total_cell_count label
+
#> 70 370771 microglial cell
+
#> 1039 62617 microglial cell
@@ -316,20 +318,136 @@ Example: all cell types avail
obs_df <- census$get("census_data")$get("homo_sapiens")$obs$read(column_names = c("cell_type", "is_primary_data"))
as.data.frame(obs_df$concat())
-#> # A tibble: 56,400,873 × 2
-#> cell_type is_primary_data
-#> <chr> <lgl>
-#> 1 syncytiotrophoblast cell FALSE
-#> 2 placental villous trophoblast FALSE
-#> 3 syncytiotrophoblast cell FALSE
-#> 4 syncytiotrophoblast cell FALSE
-#> 5 extravillous trophoblast FALSE
-#> 6 placental villous trophoblast FALSE
-#> 7 syncytiotrophoblast cell FALSE
-#> 8 extravillous trophoblast FALSE
-#> 9 placental villous trophoblast FALSE
-#> 10 syncytiotrophoblast cell FALSE
-#> # ℹ 56,400,863 more rows
+#> cell_type is_primary_data
+#> 1 syncytiotrophoblast cell FALSE
+#> 2 placental villous trophoblast FALSE
+#> 3 syncytiotrophoblast cell FALSE
+#> 4 syncytiotrophoblast cell FALSE
+#> 5 extravillous trophoblast FALSE
+#> 6 placental villous trophoblast FALSE
+#> 7 syncytiotrophoblast cell FALSE
+#> 8 extravillous trophoblast FALSE
+#> 9 placental villous trophoblast FALSE
+#> 10 syncytiotrophoblast cell FALSE
+#> 11 syncytiotrophoblast cell FALSE
+#> 12 syncytiotrophoblast cell FALSE
+#> 13 placental villous trophoblast FALSE
+#> 14 placental villous trophoblast FALSE
+#> 15 syncytiotrophoblast cell FALSE
+#> 16 syncytiotrophoblast cell FALSE
+#> 17 syncytiotrophoblast cell FALSE
+#> 18 syncytiotrophoblast cell FALSE
+#> 19 syncytiotrophoblast cell FALSE
+#> 20 extravillous trophoblast FALSE
+#> 21 syncytiotrophoblast cell FALSE
+#> 22 syncytiotrophoblast cell FALSE
+#> 23 placental villous trophoblast FALSE
+#> 24 syncytiotrophoblast cell FALSE
+#> 25 syncytiotrophoblast cell FALSE
+#> 26 syncytiotrophoblast cell FALSE
+#> 27 syncytiotrophoblast cell FALSE
+#> 28 syncytiotrophoblast cell FALSE
+#> 29 syncytiotrophoblast cell FALSE
+#> 30 syncytiotrophoblast cell FALSE
+#> 31 placental villous trophoblast FALSE
+#> 32 trophoblast giant cell FALSE
+#> 33 syncytiotrophoblast cell FALSE
+#> 34 placental villous trophoblast FALSE
+#> 35 syncytiotrophoblast cell FALSE
+#> 36 extravillous trophoblast FALSE
+#> 37 placental villous trophoblast FALSE
+#> 38 placental villous trophoblast FALSE
+#> 39 syncytiotrophoblast cell FALSE
+#> 40 syncytiotrophoblast cell FALSE
+#> 41 syncytiotrophoblast cell FALSE
+#> 42 syncytiotrophoblast cell FALSE
+#> 43 syncytiotrophoblast cell FALSE
+#> 44 placental villous trophoblast FALSE
+#> 45 placental villous trophoblast FALSE
+#> 46 syncytiotrophoblast cell FALSE
+#> 47 placental villous trophoblast FALSE
+#> 48 syncytiotrophoblast cell FALSE
+#> 49 syncytiotrophoblast cell FALSE
+#> 50 syncytiotrophoblast cell FALSE
+#> 51 syncytiotrophoblast cell FALSE
+#> 52 syncytiotrophoblast cell FALSE
+#> 53 syncytiotrophoblast cell FALSE
+#> 54 syncytiotrophoblast cell FALSE
+#> 55 placental villous trophoblast FALSE
+#> 56 placental villous trophoblast FALSE
+#> 57 placental villous trophoblast FALSE
+#> 58 syncytiotrophoblast cell FALSE
+#> 59 syncytiotrophoblast cell FALSE
+#> 60 extravillous trophoblast FALSE
+#> 61 placental villous trophoblast FALSE
+#> 62 extravillous trophoblast FALSE
+#> 63 syncytiotrophoblast cell FALSE
+#> 64 syncytiotrophoblast cell FALSE
+#> 65 syncytiotrophoblast cell FALSE
+#> 66 placental villous trophoblast FALSE
+#> 67 syncytiotrophoblast cell FALSE
+#> 68 placental villous trophoblast FALSE
+#> 69 syncytiotrophoblast cell FALSE
+#> 70 syncytiotrophoblast cell FALSE
+#> 71 syncytiotrophoblast cell FALSE
+#> 72 syncytiotrophoblast cell FALSE
+#> 73 placental villous trophoblast FALSE
+#> 74 placental villous trophoblast FALSE
+#> 75 placental villous trophoblast FALSE
+#> 76 placental villous trophoblast FALSE
+#> 77 syncytiotrophoblast cell FALSE
+#> 78 syncytiotrophoblast cell FALSE
+#> 79 syncytiotrophoblast cell FALSE
+#> 80 placental villous trophoblast FALSE
+#> 81 syncytiotrophoblast cell FALSE
+#> 82 syncytiotrophoblast cell FALSE
+#> 83 syncytiotrophoblast cell FALSE
+#> 84 placental villous trophoblast FALSE
+#> 85 extravillous trophoblast FALSE
+#> 86 syncytiotrophoblast cell FALSE
+#> 87 syncytiotrophoblast cell FALSE
+#> 88 placental villous trophoblast FALSE
+#> 89 placental villous trophoblast FALSE
+#> 90 syncytiotrophoblast cell FALSE
+#> 91 extravillous trophoblast FALSE
+#> 92 placental villous trophoblast FALSE
+#> 93 syncytiotrophoblast cell FALSE
+#> 94 syncytiotrophoblast cell FALSE
+#> 95 syncytiotrophoblast cell FALSE
+#> 96 extravillous trophoblast FALSE
+#> 97 syncytiotrophoblast cell FALSE
+#> 98 placental villous trophoblast FALSE
+#> 99 placental villous trophoblast FALSE
+#> 100 extravillous trophoblast FALSE
+#> 101 placental villous trophoblast FALSE
+#> 102 syncytiotrophoblast cell FALSE
+#> 103 syncytiotrophoblast cell FALSE
+#> 104 extravillous trophoblast FALSE
+#> 105 placental villous trophoblast FALSE
+#> 106 placental villous trophoblast FALSE
+#> 107 placental villous trophoblast FALSE
+#> 108 placental villous trophoblast FALSE
+#> 109 syncytiotrophoblast cell FALSE
+#> 110 placental villous trophoblast FALSE
+#> 111 placental villous trophoblast FALSE
+#> 112 extravillous trophoblast FALSE
+#> 113 extravillous trophoblast FALSE
+#> 114 syncytiotrophoblast cell FALSE
+#> 115 placental villous trophoblast FALSE
+#> 116 extravillous trophoblast FALSE
+#> 117 placental villous trophoblast FALSE
+#> 118 placental villous trophoblast FALSE
+#> 119 syncytiotrophoblast cell FALSE
+#> 120 syncytiotrophoblast cell FALSE
+#> 121 syncytiotrophoblast cell FALSE
+#> 122 syncytiotrophoblast cell FALSE
+#> 123 syncytiotrophoblast cell FALSE
+#> 124 placental villous trophoblast FALSE
+#> 125 placental villous trophoblast FALSE
+#> 126 placental villous trophoblast FALSE
+#> 127 extravillous trophoblast FALSE
+#> 128 extravillous trophoblast FALSE
+#> [ reached 'max' / getOption("max.print") -- omitted 56400745 rows ]
The number of rows is the total number of cells for humans. Now, if you wish to get the cell counts per cell type we can work with this data frame.
In addition, we will only focus on cells that are marked with is_primary_data=TRUE
as this ensures we de-duplicate cells that appear more than once in CELLxGENE Discover.
diff --git a/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html b/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html
index 2a93a17e3..2f51223df 100644
--- a/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html
+++ b/api/r/cellxgene.census/docs/articles/comp_bio_data_integration.html
@@ -32,7 +32,7 @@
cellxgene.census
- 1.6.0
+ 1.7.0
+#> soma_joinid collection_id collection_name
+#> 14 525 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis
+#> 35 547 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis
+#> collection_doi dataset_id
+#> 14 10.1038/s41586-020-2496-1 4546e757-34d0-4d17-be06-538318925fcd
+#> 35 10.1038/s41586-020-2496-1 6202a243-b713-4e12-9ced-c387f8483dea
+#> dataset_title
+#> 14 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2
+#> 35 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - 10x
+#> dataset_h5ad_path dataset_total_cell_count
+#> 14 4546e757-34d0-4d17-be06-538318925fcd.h5ad 2859
+#> 35 6202a243-b713-4e12-9ced-c387f8483dea.h5ad 7294
Now we can use the values from dataset_id
to query and load a Seurat object with all the cells from those datasets.
tabula_muris_liver_ids <- c("4546e757-34d0-4d17-be06-538318925fcd", "6202a243-b713-4e12-9ced-c387f8483dea")
@@ -293,20 +298,20 @@ # Run the standard workflow for visualization and clustering
seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE)
seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = "pca", dims = 1:30)
-#> 13:30:46 UMAP embedding parameters a = 0.9922 b = 1.112
-#> 13:30:46 Read 10153 rows and found 30 numeric columns
-#> 13:30:46 Using Annoy for neighbor search, n_neighbors = 30
-#> 13:30:46 Building Annoy index with metric = cosine, n_trees = 50
+#> 10:26:29 UMAP embedding parameters a = 0.9922 b = 1.112
+#> 10:26:29 Read 10153 rows and found 30 numeric columns
+#> 10:26:29 Using Annoy for neighbor search, n_neighbors = 30
+#> 10:26:29 Building Annoy index with metric = cosine, n_trees = 50
#> 0% 10 20 30 40 50 60 70 80 90 100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
-#> 13:30:47 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//RtmpwHKO6w/file16d2113ba6e8
-#> 13:30:47 Searching Annoy index using 1 thread, search_k = 3000
-#> 13:30:51 Annoy recall = 100%
-#> 13:30:51 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
-#> 13:30:52 Initializing from normalized Laplacian + noise (using irlba)
-#> 13:30:53 Commencing optimization for 200 epochs, with 410804 positive edges
-#> 13:30:59 Optimization finished
+#> 10:26:31 Writing NN index file to temp file /var/folders/9q/j_1zpqns7vz_s5n9l6_b943r0000gr/T//RtmpYMsCtV/file7cd6474698e7
+#> 10:26:31 Searching Annoy index using 1 thread, search_k = 3000
+#> 10:26:34 Annoy recall = 100%
+#> 10:26:35 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
+#> 10:26:36 Initializing from normalized Laplacian + noise (using irlba)
+#> 10:26:36 Commencing optimization for 200 epochs, with 410804 positive edges
+#> 10:26:42 Optimization finished
Plot the UMAP.
# By assay
diff --git a/api/r/cellxgene.census/docs/articles/comp_bio_normalizing_full_gene_sequencing.html b/api/r/cellxgene.census/docs/articles/comp_bio_normalizing_full_gene_sequencing.html
index c91232c4e..782e325c1 100644
--- a/api/r/cellxgene.census/docs/articles/comp_bio_normalizing_full_gene_sequencing.html
+++ b/api/r/cellxgene.census/docs/articles/comp_bio_normalizing_full_gene_sequencing.html
@@ -32,7 +32,7 @@
cellxgene.census
- 1.6.0
+ 1.7.0
+#> soma_joinid collection_id collection_name
+#> 1 525 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis
+#> collection_doi dataset_id
+#> 1 10.1038/s41586-020-2496-1 4546e757-34d0-4d17-be06-538318925fcd
+#> dataset_title
+#> 1 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2
+#> dataset_h5ad_path dataset_total_cell_count
+#> 1 4546e757-34d0-4d17-be06-538318925fcd.h5ad 2859
Now we can use this id to fetch the data.
liver_dataset_id <- liver_dataset[1,"dataset_id"]
diff --git a/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html b/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html
index 6c4a6ddab..422b086eb 100644
--- a/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html
+++ b/api/r/cellxgene.census/docs/articles/comp_bio_summarize_axis_query.html
@@ -32,7 +32,7 @@
cellxgene.census
- 1.6.0
+ 1.7.0
diff --git a/api/r/cellxgene.census/docs/index.html b/api/r/cellxgene.census/docs/index.html
index f5a40068a..dc57749ee 100644
--- a/api/r/cellxgene.census/docs/index.html
+++ b/api/r/cellxgene.census/docs/index.html
@@ -36,7 +36,7 @@