-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdefine_clusters_de.Rmd
288 lines (221 loc) · 17.6 KB
/
define_clusters_de.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
---
title: "define_clusters_de.Rmd"
author: "Sahin Naqvi"
date: "10/30/2017"
output: html_document
---
Starting from a raw counts matrix, use Seurat to perform filtering, normalization, PCA, and clustering of single cells. Use tSNE for visualization
```{r}
library(Seurat)
library(dplyr)
library(Matrix)
rawdat = data.matrix(read.delim("alldat.noembryo.countmat.txt",header=TRUE,stringsAsFactors = FALSE,row.names = 1))
#filter for minimum numbers of genes/UMIs detected
pgc = CreateSeuratObject(raw.data = rawdat, min.cells = 20, min.genes = 2000, project = "PGC")
VlnPlot(object = pgc, features.plot = c("nGene", "nUMI"), nCol = 2)
pgc = FilterCells(object = pgc, subset.names = c("nGene","nUMI"), low.thresholds = c(2000,100000), high.thresholds = c(Inf,Inf))
#convert Run IDs to age-based IDs
new2old.ident = read.table("run_info.short.txt",header = TRUE,stringsAsFactors = FALSE,sep="\t")
pgc@ident <- plyr::mapvalues(x = pgc@ident, from = new2old.ident$Run_s, to = new2old.ident$developmental_stage_s)
[email protected]$orig.ident = plyr::mapvalues(x = [email protected]$orig.ident, from = new2old.ident$Run_s, to = new2old.ident$developmental_stage_s)
pgc <- StashIdent(pgc, save.name = "Embryo_Age")
[email protected]$tp.ident = gsub(" week","",[email protected]$orig.ident)
#normalize count matrix
pgc <- NormalizeData(object = pgc, normalization.method = "LogNormalize",scale.factor = 500000) #skip if starting with log-normalized TPM values
pgc <- FindVariableGenes(object = pgc, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 2, y.cutoff = 2,do.plot = TRUE)
length([email protected])
# Run PCA, choosing number of significant PCs via jackstraw analysis
pgc <- ScaleData(object = pgc, vars.to.regress = c("nUMI"))
pgc <- RunPCA(object = pgc, pc.genes = [email protected], do.print = FALSE, genes.print = 5,pcs.compute = 25)
pgc <- JackStraw(object = pgc, num.replicate = 200, do.print = FALSE,num.pc = 25)
JackStrawPlot(object = pgc, PCs = 1:25)
PCElbowPlot(pgc,num.pc = 25)
#define clusters and perform tSNE based on chosen PCs
pgc <- FindClusters(object = pgc, reduction.type = "pca", dims.use = 1:19, resolution = 1.2, print.output = 0, save.SNN = TRUE)
pgc <- RunTSNE(object = pgc, dims.use = 1:19, do.fast = TRUE)
plot1 = TSNEPlot(object=pgc,do.label = T,do.return=T)
plot2 = TSNEPlot(object=pgc,do.label = F,do.return=T,group.by="tp.ident")
multiplot(plot1,plot2,cols=2)
# sum the expression of Y-linked genes
ygenes = c("DAZ1", "DAZ2", "DAZ3", "DAZ4", "DDX3Y", "EIF1AY", "UTY", "KDM5D", "ZFY", "RPS4Y2", "RPS4Y1", "RBMY1A1", "RBMY1B", "RBMY1D", "RBMY1E", "RBMY1F", "RBMY1J")
pgc@data = rbind(pgc@data,apply(pgc@data[intersect(rownames(pgc@data),ygenes),],2,sum))
rownames(pgc@data)[nrow(pgc@data)] = "YSUM"
# change features.plot to plot expression patterns for a desired list of genes
plot3 = FeaturePlot(object = pgc, features.plot = c("WT1","NANOG","FIGLA","POU5F1"), cols.use = c("grey", "blue"), reduction.use = "tsne",do.return = T)
```
SCDE analysis using assigned clusters from Seurat - male pre- vs post-entry into genital ridge
```{r}
library(scde)
# define groups to compare based on cluster membership (cluster 5 = week 4/5 male and female germ cells, clusters 1,11,6 = male germ cells from later weeks)
clust_factors = subset([email protected],res.1.2 %in% c(5,11,1,6))[,c("res.1.2")]
clust_factors[which(clust_factors %in% c(5))] = 0
clust_factors[which(clust_factors %in% c(1,11,6))] = 1
clust_factors = factor(clust_factors)
names(clust_factors) = rownames(subset([email protected],res.1.2 %in% c(5,11,1,6)))
rawdat_forde = [email protected][,names(clust_factors)]
rawdat_forde<-apply(rawdat_forde,2,function(x) {storage.mode(x) <- 'integer'; x})
cd = clean.counts(rawdat_forde,min.lib.size=1000, min.reads = 1, min.detected = 1)
cd_tpm = t((t(cd)/apply(cd,2,sum))*100000)
ygenes = c("DAZ1","DAZ2","DAZ3","DAZ4","DDX3Y","EIF1AY","UTY","KDM5D","ZFY","RPS4Y2","RPS4Y1","RBMY1A1","RBMY1B","RBMY1D","RBMY1E","RBMY1F","RBMY1J")
# further refine cluster membership, excluding cells with low NANOG (not pluripotent) or any WT1 (somatic) expression
clust_factors = clust_factors[setdiff(names(clust_factors),names(cd_tpm["WT1",which(cd_tpm["WT1",] > 0)]))]
clust_factors = clust_factors[setdiff(names(clust_factors),names(cd_tpm["NANOG",which(cd_tpm["NANOG",] < 5)]))]
# only analyze male cells (this only serves to remove female cells from cluster 5)
ysum_tpm = apply(cd_tpm[intersect(rownames(cd_tpm),ygenes),],2,sum)
clust_factors_monly = clust_factors[names(ysum_tpm[which(ysum_tpm >= 10)])]
cd_monly = cd[,names(clust_factors_monly)]
o.ifm <- scde.error.models(counts = cd_monly, groups = clust_factors_monly, n.cores = 8, threshold.segmentation = TRUE, save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 0,min.count.threshold = 1)
valid.cells <- o.ifm$corr.a > 0
group_factors_monly = clust_factors_monly[rownames(o.ifm)]
o.prior <- scde.expression.prior(models = o.ifm, counts = cd_monly, length.out = 400, show.plot = FALSE)
ediff <- scde.expression.difference(o.ifm, cd_monly, o.prior, groups = group_factors_monly, n.randomizations = 100, n.cores = 8, verbose = 1)
# use posterior estimates of cluster-wide expression to get a list of genes expressed > 1 TPM in human PGCs
jp <- scde.posteriors(models = o.ifm[names(clust_factors_monly[which(clust_factors_monly == 0)]), ], cd_monly, o.prior, n.cores = 8)
jp.mles = as.numeric(colnames(jp)[apply(jp, 1, which.max)])
names(jp.mles) = rownames(jp)
jp.1 <- scde.posteriors(models = o.ifm[names(clust_factors_monly[which(clust_factors_monly == 1)]), ], cd_monly, o.prior, n.cores = 8)
jp.1.mles = as.numeric(colnames(jp.1)[apply(jp.1, 1, which.max)])
names(jp.1.mles) = rownames(jp.1)
humanpgc_expr = names(jp.mles[which(jp.mles > 1)])
cd_monly_tpm = cd_tpm[,names(group_factors_monly[which(!is.na(group_factors_monly))])]
clust_1_11_6_fracexpr = apply(cd_monly_tpm[,names(group_factors_monly)[which(group_factors_monly==1)]] >1,1,sum)/length(which(group_factors_monly==1))
# get a list of protein-coding genes from the human annotation used
include_excep = c("GOLGA2P3Y","GOLGA2P2Y","CSPG4P4Y","CSPG4P3Y") #exceptions to include that are annotated as pseudogenes
embl <- read.table("gencode.v24.annotation.basic_ccds_nopar.gene_tx_annotable.txt", sep = "\t", header = TRUE,stringsAsFactors = FALSE)
embl = subset(embl, (transcript_type == 'protein_coding'))
gene_chr_pre = unique(embl[,c("gene_name","seqname")])
embl <- unique(embl$gene_name)
embl <- c(as.character(as.matrix(embl)),include_excep)
# subset the SCDE results to only include protein-coding genes
ediff_protcod = ediff[rownames(ediff) %in% embl,]
ediff_protcod$pval = 2*(pnorm(-abs(ediff_protcod$Z)))
ediff_protcod$p.adj = p.adjust(ediff_protcod$pval,method="BH")
```
SCDE analysis using assigned clusters from Seurat - female pre- vs post-entry into genital ridge
```{r}
# first split cells into groups corresponding to clusters 5 (male/female week 4/5 embryos) and cluster 2 (all female, later stage embryos)
clust_factors_fonly = subset([email protected],res.1.2 %in% c(2,5))[,c("res.1.2")]
clust_factors_fonly[which(clust_factors_fonly %in% c(5))] = 0
clust_factors_fonly[which(clust_factors_fonly %in% c(2))] = 1
clust_factors_fonly = factor(clust_factors_fonly)
names(clust_factors_fonly) = rownames(subset([email protected],res.1.2 %in% c(2,5)))
clust_factors_fonly = clust_factors_fonly[setdiff(names(clust_factors_fonly),names(clust_factors_monly))]
rawdat_forde_fonly = [email protected][,names(clust_factors_fonly)]
rawdat_forde_fonly<-apply(rawdat_forde_fonly,2,function(x) {storage.mode(x) <- 'integer'; x})
cd_fonly = clean.counts(rawdat_forde_fonly,min.lib.size=1000, min.reads = 1, min.detected = 1)
cd_fonly_tpm = t((t(cd_fonly)/apply(cd_fonly,2,sum))*100000)
ysum_fonly_tpm = apply(cd_fonly_tpm[intersect(rownames(cd_fonly_tpm),ygenes),],2,sum)
#filter out male and week 4 cells
clust_factors_fonly = clust_factors_fonly[names(ysum_fonly_tpm[which(ysum_fonly_tpm < 1)])]
clust_factors_fonly[which(!grepl("SRR4199308",names(clust_factors_fonly)))] = 1
#filter out NANOG low or WT1-positive cells
clust_factors_fonly = clust_factors_fonly[setdiff(names(clust_factors_fonly),names(cd_tpm["WT1",which(cd_tpm["WT1",] > 0)]))]
clust_factors_fonly = clust_factors_fonly[setdiff(names(clust_factors_fonly),names(cd_tpm["NANOG",which(cd_tpm["NANOG",] < 5)]))]
cd_fonly = cd_fonly[,names(clust_factors_fonly)]
cd_fonly_tpm = cd_fonly_tpm[,names(clust_factors_fonly)]
o.ifm.fonly <- scde.error.models(counts = cd_fonly, groups = clust_factors_fonly, n.cores = 8, threshold.segmentation = TRUE, save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 0,min.count.threshold = 1)
valid.cells <- o.ifm.fonly$corr.a > 0
group_factors_fonly = clust_factors_fonly[rownames(o.ifm.fonly)]
o.prior.fonly <- scde.expression.prior(models = o.ifm.fonly, counts = cd_fonly, length.out = 400, show.plot = FALSE)
ediff.fonly <- scde.expression.difference(o.ifm.fonly, cd_fonly, o.prior.fonly, groups = group_factors_fonly, n.randomizations = 100, n.cores = 32, verbose = 1)
ediff.fonly[,1:6] = -ediff.fonly[,1:6]
ediff.fonly_protcod = ediff.fonly[rownames(ediff.fonly) %in% embl,]
ediff.fonly_protcod$pval = 2*(pnorm(-abs(ediff.fonly_protcod$Z)))
ediff.fonly_protcod$p.adj = p.adjust(ediff.fonly_protcod$pval,method="BH")
```
SCDE analysis using assigned clusters from Seurat - male early vs late gonadal
```{r}
# define groups to compare based on cluster membership (clusters 1,11,6 = male germ cells from later weeks but early gonadal from previous comparison, cluster 0 = late gonadal cells)
clust_factors_monly_gonadvlate = clust_factors_monly[which(clust_factors_monly==1)]
clust_factors_monly_gonadvlate[which(clust_factors_monly_gonadvlate == 1)] = 0
temp = subset([email protected],res.1.2 %in% c(0))[,c("res.1.2")]
names(temp) = rownames(subset([email protected],res.1.2 %in% c(0)))
temp[which(temp==0)] = 2
clust_factors_monly_gonadvlate = c(clust_factors_monly_gonadvlate,temp)
clust_factors_monly_gonadvlate[which(clust_factors_monly_gonadvlate == 1)] = 0
clust_factors_monly_gonadvlate[which(clust_factors_monly_gonadvlate %in% c(2))] = 1
rawdat_forde_monly = [email protected][,names(clust_factors_monly_gonadvlate)]
rawdat_forde_monly<-apply(rawdat_forde_monly,2,function(x) {storage.mode(x) <- 'integer'; x})
cd_monly = clean.counts(rawdat_forde_monly,min.lib.size=1000)
# run SCDE
o.ifm.monly <- scde.error.models(counts = cd_monly, groups = clust_factors_monly_gonadvlate, n.cores = 8, threshold.segmentation = TRUE, save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 0,min.count.threshold = 1)
valid.cells <- o.ifm.monly$corr.a > 0
o.prior.monly <- scde.expression.prior(models = o.ifm.monly, counts = cd_monly, length.out = 400, show.plot = FALSE)
ediff.monly.gonadvlate <- scde.expression.difference(o.ifm.monly, cd_monly, o.prior.monly, groups = as.factor(clust_factors_monly_gonadvlate), n.randomizations = 100, n.cores = 8, verbose = 1)
ediff.monly.gonadvlate[,1:6] = -ediff.monly.gonadvlate[,1:6]
# estimate MLE TPMs in early and late gonadal groups
jp.xy.0.gonadvlate <- scde.posteriors(models = o.ifm.monly[names(clust_factors_monly_gonadvlate[which(clust_factors_monly_gonadvlate == 0)]), ], cd_monly, o.prior.monly, n.cores = 8)
jp.xy.0.mles.gonadvlate = as.numeric(colnames(jp.xy.0.gonadvlate)[apply(jp.xy.0.gonadvlate, 1, which.max)])
names(jp.xy.0.mles.gonadvlate) = rownames(jp.xy.0.gonadvlate)
jp.xy.1.gonadvlate <- scde.posteriors(models = o.ifm.monly[names(clust_factors_monly_gonadvlate[which(clust_factors_monly_gonadvlate == 1)]), ], cd_monly, o.prior.monly, n.cores = 8)
jp.xy.1.mles.gonadvlate = as.numeric(colnames(jp.xy.1.gonadvlate)[apply(jp.xy.1.gonadvlate, 1, which.max)])
names(jp.xy.1.mles.gonadvlate) = rownames(jp.xy.1.gonadvlate)
ediff.monly.gonadvlate_protcod = ediff.monly.gonadvlate[rownames(ediff.monly.gonadvlate) %in% embl,]
ediff.monly.gonadvlate_protcod$pval = 2*(pnorm(-abs(ediff.monly.gonadvlate_protcod$Z)))
ediff.monly.gonadvlate_protcod$p.adj = p.adjust(ediff.monly.gonadvlate_protcod$pval,method="BH")
```
SCDE analysis using assigned clusters from Seurat - female early vs late gonadal
```{r}
# define groups to compare cluster 2 (all female, later stage embryos but early gonadal from above comparison) to clusters 7,10,13 (late gonadal)
clust_factors_fonly_gonadvlate = clust_factors_fonly[which(clust_factors_fonly==1)]
clust_factors_fonly_gonadvlate[which(clust_factors_fonly_gonadvlate == 1)] = 0
temp = subset([email protected],res.1.2 %in% c(7,10,13))[,c("res.1.2")]
names(temp) = rownames(subset([email protected],res.1.2 %in% c(7,10,13)))
clust_factors_fonly_gonadvlate = c(clust_factors_fonly_gonadvlate,temp)
clust_factors_fonly_gonadvlate[which(clust_factors_fonly_gonadvlate == 1)] = 0
clust_factors_fonly_gonadvlate[which(clust_factors_fonly_gonadvlate %in% c(7,10,13))] = 1
rawdat_forde_fonly = [email protected][,names(clust_factors_fonly_gonadvlate)]
rawdat_forde_fonly<-apply(rawdat_forde_fonly,2,function(x) {storage.mode(x) <- 'integer'; x})
cd_fonly = clean.counts(rawdat_forde_fonly,min.lib.size=1000)
# run SCDE for differential expression
o.ifm.fonly <- scde.error.models(counts = cd_fonly, groups = clust_factors_fonly_gonadvlate, n.cores = 8, threshold.segmentation = TRUE, save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 1,min.count.threshold = 1)
valid.cells <- o.ifm.fonly$corr.a > 0
group_factors_fonly_gonadvlate = clust_factors_fonly_gonadvlate[rownames(o.ifm.fonly)]
o.prior.fonly <- scde.expression.prior(models = o.ifm.fonly, counts = cd_fonly, length.out = 400, show.plot = FALSE)
ediff.fonly.gonadvlate <- scde.expression.difference(o.ifm.fonly, cd_fonly, o.prior.fonly, groups = as.factor(clust_factors_fonly_gonadvlate), n.randomizations = 100, n.cores = 8, verbose = 1)
ediff.fonly.gonadvlate[,1:6] = -ediff.fonly.gonadvlate[,1:6]
# estimate MLE TPM in early and late gonadal groups
jp.xx.0.gonadvlate <- scde.posteriors(models = o.ifm.fonly[names(clust_factors_fonly_gonadvlate[which(clust_factors_fonly_gonadvlate == 0)]), ], cd_fonly, o.prior.fonly, n.cores = 8)
jp.xx.0.mles.gonadvlate = as.numeric(colnames(jp.xx.0.gonadvlate)[apply(jp.xx.0.gonadvlate, 1, which.max)])
names(jp.xx.0.mles.gonadvlate) = rownames(jp.xx.0.gonadvlate)
jp.xx.1.gonadvlate <- scde.posteriors(models = o.ifm.fonly[names(clust_factors_fonly_gonadvlate[which(clust_factors_fonly_gonadvlate == 1)]), ], cd_fonly, o.prior.fonly, n.cores = 8)
jp.xx.1.mles.gonadvlate = as.numeric(colnames(jp.xx.1.gonadvlate)[apply(jp.xx.1.gonadvlate, 1, which.max)])
names(jp.xx.1.mles.gonadvlate) = rownames(jp.xx.1.gonadvlate)
ediff.fonly.gonadvlate_protcod = ediff.fonly.gonadvlate[rownames(ediff.fonly.gonadvlate) %in% embl,]
ediff.fonly.gonadvlate_protcod$pval = 2*(pnorm(-abs(ediff.fonly.gonadvlate_protcod$Z)))
ediff.fonly.gonadvlate_protcod$p.adj = p.adjust(ediff.fonly.gonadvlate_protcod$pval,method="BH")
```
calculate germ cell specificity from SCDE posteriors of germ cells vs somatic cells
```{r}
gc_sc_mf_labels = subset([email protected],res.1.2 %in% c(1,2,5,3,4,7,10,13,11,6))[,c("res.1.2")]
names(gc_sc_mf_labels) = rownames(subset([email protected],res.1.2 %in% c(1,2,5,3,4,7,10,13,11,6)))
# divide cells into male germ cells, male somatic cells, female somatic cells, and female germ cells on the basis of assigned clusters
gc_sc_mf_labels[which((gc_sc_mf_labels %in% c(1,6,11))&(names(gc_sc_mf_labels) %in% colnames(rawdat_tpm_filt)[which(rawdat_tpm_filt["WT1",] == 0)]))] = "GCm"
gc_sc_mf_labels[which((gc_sc_mf_labels %in% c(4))&(names(gc_sc_mf_labels) %in% colnames(rawdat_tpm_filt)[which(rawdat_tpm_filt["WT1",] > 0)]))] = "SCm"
gc_sc_mf_labels[which((gc_sc_mf_labels %in% c(2,5,7,10,13))&(names(gc_sc_mf_labels) %in% colnames(rawdat_tpm_filt)[which(rawdat_tpm_filt["WT1",] == 0)]))] = "GCf"
gc_sc_mf_labels[which((gc_sc_mf_labels %in% c(3))&(names(gc_sc_mf_labels) %in% colnames(rawdat_tpm_filt)[which(rawdat_tpm_filt["WT1",] > 0)]))] = "SCf"
gc_sc_mf_labels = gc_sc_mf_labels[which(gc_sc_mf_labels %in% c("GCm","SCm","GCf","SCf"))]
# run SCDE considering all 4 clusters simultaneously
rawdat_gcsc = [email protected][,names(gc_sc_mf_labels)]
rawdat_gcsc<-apply(rawdat_gcsc,2,function(x) {storage.mode(x) <- 'integer'; x})
cd_gcsc = clean.counts(rawdat_gcsc,min.lib.size=1000, min.reads = 1, min.detected = 1)
o.ifm.gcsc <- scde.error.models(counts = cd_gcsc, groups = gc_sc_mf_labels, n.cores = 8, threshold.segmentation = TRUE, save.crossfit.plots = FALSE, save.model.plots = FALSE, verbose = 0,min.count.threshold = 1)
valid.cells <- o.ifm.gcsc$corr.a > 0
#extract posterior expression MLEs for each group
o.prior.gcsc <- scde.expression.prior(models = o.ifm.gcsc, counts = cd_gcsc, length.out = 400, show.plot = FALSE)
gcm_p <- scde.posteriors(models = o.ifm.gcsc[names(gc_sc_mf_labels[which(gc_sc_mf_labels == "GCm")]), ], cd_gcsc, o.prior.gcsc, n.cores = 8)
scm_p <- scde.posteriors(models = o.ifm.gcsc[names(gc_sc_mf_labels[which(gc_sc_mf_labels == "SCm")]), ], cd_gcsc, o.prior.gcsc, n.cores = 8)
gcf_p <- scde.posteriors(models = o.ifm.gcsc[names(gc_sc_mf_labels[which(gc_sc_mf_labels == "GCf")]), ], cd_gcsc, o.prior.gcsc, n.cores = 8)
scf_p <- scde.posteriors(models = o.ifm.gcsc[names(gc_sc_mf_labels[which(gc_sc_mf_labels == "SCf")]), ], cd_gcsc, o.prior.gcsc, n.cores = 8)
gcm_p.mles = as.numeric(colnames(gcm_p)[apply(gcm_p, 1, which.max)])
names(gcm_p.mles) = rownames(gcm_p)
scm_p.mles = as.numeric(colnames(scm_p)[apply(scm_p, 1, which.max)])
names(scm_p.mles) = rownames(scm_p)
gcf_p.mles = as.numeric(colnames(gcf_p)[apply(gcf_p, 1, which.max)])
names(gcf_p.mles) = rownames(gcf_p)
scf_p.mles = as.numeric(colnames(scf_p)[apply(scf_p, 1, which.max)])
names(scf_p.mles) = rownames(scf_p)
# calculate germ cell specificity as the expression MLE in germ cells divided by the summed expression MLE in germ and somatic cells
gc_male_spec_mle = gcm_p.mles/(gcm_p.mles+scm_p.mles)
gc_female_spec_mle = gcf_p.mles/(gcf_p.mles+scf_p.mles)
```