forked from kessenbrocklab/Nguyen_Pervolarakis_Nat_Comm_2018
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGeneScore.R
151 lines (139 loc) · 5.38 KB
/
GeneScore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
GeneScore<-function(matrix,idents,genes,plot.title){
set.seed(1)
#Matrix
ind1_trimmed_log<-matrix
print(dim(ind1_trimmed_log))
#Cell IDs
cell.id.trimmed<-idents
gene_agg<-rowSums(ind1_trimmed_log)
head(gene_agg)
gene_agg_sort<-sort(gene_agg)
range(gene_agg_sort) #0.7280198 2152.8409068
gene_agg_sort_binned<-cut(gene_agg_sort,25,labels=FALSE)
#gene_agg_sort_binned
#hist(gene_agg_sort_binned)
Bin1_genes<-which(gene_agg_sort_binned == "1")
Bin2_genes<-which(gene_agg_sort_binned == "2")
Bin3_genes<-which(gene_agg_sort_binned == "3")
Bin4_genes<-which(gene_agg_sort_binned == "4")
Bin5_genes<-which(gene_agg_sort_binned == "5")
Bin6_genes<-which(gene_agg_sort_binned == "6")
Bin7_genes<-which(gene_agg_sort_binned == "7")
Bin8_genes<-which(gene_agg_sort_binned == "8")
Bin9_genes<-which(gene_agg_sort_binned == "9")
Bin10_genes<-which(gene_agg_sort_binned == "10")
Bin11_genes<-which(gene_agg_sort_binned == "11")
Bin12_genes<-which(gene_agg_sort_binned == "12")
Bin13_genes<-which(gene_agg_sort_binned == "13")
Bin14_genes<-which(gene_agg_sort_binned == "14")
Bin15_genes<-which(gene_agg_sort_binned == "15")
Bin16_genes<-which(gene_agg_sort_binned == "16")
Bin17_genes<-which(gene_agg_sort_binned == "17")
Bin18_genes<-which(gene_agg_sort_binned == "18")
Bin19_genes<-which(gene_agg_sort_binned == "19")
Bin20_genes<-which(gene_agg_sort_binned == "20")
Bin21_genes<-which(gene_agg_sort_binned == "21")
Bin22_genes<-which(gene_agg_sort_binned == "22")
Bin23_genes<-which(gene_agg_sort_binned == "23")
Bin24_genes<-which(gene_agg_sort_binned == "24")
Bin25_genes<-which(gene_agg_sort_binned == "25")
#head(gene_agg_sort)
tmp<-rbind(gene_agg_sort,rep(0,length(gene_agg_sort)))
tmp<-as.data.frame(tmp)
tmp[2,Bin1_genes]<-"Bin1"
tmp[2,Bin2_genes]<-"Bin2"
tmp[2,Bin3_genes]<-"Bin3"
tmp[2,Bin4_genes]<-"Bin4"
tmp[2,Bin5_genes]<-"Bin5"
tmp[2,Bin6_genes]<-"Bin6"
tmp[2,Bin7_genes]<-"Bin7"
tmp[2,Bin8_genes]<-"Bin8"
tmp[2,Bin9_genes]<-"Bin9"
tmp[2,Bin10_genes]<-"Bin10"
tmp[2,Bin11_genes]<-"Bin11"
tmp[2,Bin12_genes]<-"Bin12"
tmp[2,Bin13_genes]<-"Bin13"
tmp[2,Bin14_genes]<-"Bin14"
tmp[2,Bin15_genes]<-"Bin15"
tmp[2,Bin16_genes]<-"Bin16"
tmp[2,Bin17_genes]<-"Bin17"
tmp[2,Bin18_genes]<-"Bin18"
tmp[2,Bin19_genes]<-"Bin19"
tmp[2,Bin20_genes]<-"Bin20"
tmp[2,Bin21_genes]<-"Bin21"
tmp[2,Bin22_genes]<-"Bin22"
tmp[2,Bin23_genes]<-"Bin23"
tmp[2,Bin24_genes]<-"Bin24"
tmp[2,Bin25_genes]<-"Bin25"
of_interest_genes<-genes
#Run Scores####
#of_interest_genes
of_interest_bin_ID<-colnames(tmp) %in% of_interest_genes[,1] #Identify colnames of genes in tmp
of_interest_bin_ID<-grep("TRUE",of_interest_bin_ID) #Identify column numbers of genes in tmp
of_interest_binned<-tmp[,of_interest_bin_ID] #Trim binned matrix to only cell cycle genes
of_interest_sampling<-(of_interest_binned[2,]) #Determine bins that need to be sampled from
#Count number of genes in each bin
counts<-list()
for(i in 1:25)
{
counts[[i]]<-length(grep(paste("Bin",i,"$",sep=''),of_interest_sampling[1,]))
}
counts<-unlist(counts)
#Loop through and make a list of each of the genes for each sample, with each index being the list of genes for sample
gene_samples<-list()
k=1
bin_names<-list()
for(i in 1:25)
{
bin_names[i]<-unlist(noquote(paste("Bin",i,"_genes",sep='')))
}
for(i in 1:25)
{
if(counts[i]>0)
{
for(j in 1:counts[i])
{
if (length(get(bin_names[[i]]))>100)
{
gene_samples[[k]]<-sample(colnames(tmp)[get(bin_names[[i]])], 100)
#print(paste("Bin",i,"_genes",sep=''))
}
else
{
gene_samples[[k]]<-sample(colnames(tmp)[get(bin_names[[i]])], 100,replace=TRUE)
}
k = k + 1
}
}
else
{}
}
#Loop through length of cell cycle sampling and create sample of genes from its corresponding bin
#of_interest_sampling
of_interest_inds<-match(colnames(of_interest_sampling),rownames(ind1_trimmed_log))
of_interest_matrix<-ind1_trimmed_log[of_interest_inds,]
of_interest_gene_scores<-colMeans(of_interest_matrix)
#Loop through all cell cycle sample numbers and rbind together forever
of_interest_control_matrix<-
ind1_trimmed_log[gene_samples[[1]],]
#rownames(of_interest_control_matrix)<-1:100
for(i in 2:length(gene_samples))
{
of_interest_control_matrix<-rbind(of_interest_control_matrix,ind1_trimmed_log[gene_samples[[i]],],make.row.names=TRUE)
}
of_interest_control_scores<-colMeans(of_interest_control_matrix)
of_interest_scores<-of_interest_gene_scores-of_interest_control_scores
of_interest_score_matrix<-as.matrix(of_interest_scores)
trimmed_cells_clusterID<-cell.id.trimmed[,1] %in% rownames(of_interest_score_matrix)
trimmed_cells_clusterID_IDs<-grep("TRUE",trimmed_cells_clusterID) #Identify column numbers of genes in tmp
cell_id_trimmed_trimmed<-cell.id.trimmed[trimmed_cells_clusterID_IDs,]
of_interest_score_plotting_matrix<-cbind.data.frame(of_interest_score_matrix, cell_id_trimmed_trimmed[,2])
colnames(of_interest_score_plotting_matrix)<-c("Score","ClusterID")
#Plotting Scores CHANGE TITLE FOR DIFFERENT GRAPHS####
q<-ggplot(of_interest_score_plotting_matrix,aes(x=ClusterID, y=Score,color=ClusterID))
q+
geom_violin(aes(fill=ClusterID))+
geom_jitter(height=0,color="black",size=0.1)+
ggtitle(plot.title)+
theme_bw()
}