-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathKey_ASSIGN_functions.R
executable file
·143 lines (130 loc) · 5.86 KB
/
Key_ASSIGN_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
library(devtools)
gatherFile<-function(baseDir){
###gathers all the prediction from baseDir/*/*/pathway_activity_testset* format
setwd(baseDir)
filenames<-system("ls */*/pathway_activity_testset*", intern=TRUE)
filenames
data=NULL
for(i in 1:length(filenames)){
###reading in the filess one at a time
f<-read.csv(filenames[i], header=1,row.names=1)
colnames(f)<-paste(filenames[i],colnames(f),sep='/')
if(i==1){
data<-f
}
else{
data<-cbind(data,f)
}
}
return(data)
}
resistant_or_sensitive=function(value,cutoff){
if(value>cutoff){
status='S'
}
else{
status='R'
}
return (status)
}
short_to_long_TCGA_id=function(longnames=NULL,shortnames=NULL){
counter=0
for (j in 1:length(shortnames)){
if(!is.na(pmatch(shortnames[j],longnames))){
shortnames[j]<-longnames[pmatch(shortnames[j],longnames, duplicates.ok=F)]
counter=counter+1
}
}
print(paste(counter,"names have been changed",sep= " "))
return(shortnames)
}
merge_drop<-function(x,y,by=0,...){
new_m<-merge(x,y,by=by,...)
rownames(new_m)<-new_m$Row.names
return(new_m[,2:length(colnames(new_m))])
}
pcaplot<-function(mat,sub,center=T,scale=T){
if(sum(sub)!=length(mat)){
print("verify the subscripts...exiting now")
}
else{
pca_mat <- prcomp(t(mat), center=center,scale=scale)
plot(pca_mat)
plot(pca_mat$x[,1],pca_mat$x[,2])
abline(h=0,v=0)
for(i in length(sub):1){
print(i)
if(i!=1){
points(pca_mat$x[sum(sub[1:i-1]):sum(sub[1:i])],pca_mat$x[sum(sub[1:i-1]):sum(sub[1:i]),2],col=i,pch=i)
}
else{
points(pca_mat$x[1:sub[i]],pca_mat$x[1:sub[i],2],col=i,pch=i)
}
}
}
}
assign_easy_multi<-function(trainingData=train, testData=test, trainingLabel1=NULL,g=100,out_dir_base="~/Desktop/tmp",cov=0, geneList=NULL,single=0, sigma_sZero = 0.01, sigma_sNonZero = 1, iter=100000, burn_in=50000, S_zeroPrior = TRUE){
if(cov==0 & single==0){
adapB_folder<-paste(out_dir_base,paste( "adapB_multi",sigma_sZero,sigma_sNonZero,sep='_'),sep='/')
dir.create(file.path(out_dir_base,paste( "adapB_multi",sigma_sZero,sigma_sNonZero,sep='_')))
adap_adap_folder<-paste(out_dir_base,paste( "adap_adap_multi",sigma_sZero,sigma_sNonZero,sep='_'),sep='/')
dir.create(file.path(out_dir_base,paste( "adap_adap_multi",sigma_sZero,sigma_sNonZero,sep='_')))
}
else if (cov==0 & single==1){
adapB_folder<-paste(out_dir_base,paste( "adapB_single",sigma_sZero,sigma_sNonZero,sep='_'),sep='/')
dir.create(file.path(out_dir_base,paste( "adapB_single",sigma_sZero,sigma_sNonZero,sep='_')))
adap_adap_folder<-paste(out_dir_base,paste( "adap_adap_single",sigma_sZero,sigma_sNonZero,sep='_'),sep='/')
dir.create(file.path(out_dir_base,paste( "adap_adap_single",sigma_sZero,sigma_sNonZero,sep='_')))
}
if(is.null(geneList)){
set.seed(1234)
assign.wrapper(trainingData=trainingData, testData=testData, trainingLabel=trainingLabel1,
geneList=NULL, n_sigGene=g, adaptive_B=T, adaptive_S=F, mixture_beta=F, S_zeroPrior = S_zeroPrior,
outputDir=adapB_folder, sigma_sZero = sigma_sZero, sigma_sNonZero = sigma_sNonZero, iter=iter, burn_in=burn_in)
set.seed(1234)
assign.wrapper(trainingData=trainingData, testData=testData, trainingLabel=trainingLabel1,
geneList=NULL, n_sigGene=g, adaptive_B=T, adaptive_S=T, mixture_beta=F, S_zeroPrior = S_zeroPrior,
outputDir=adap_adap_folder, sigma_sZero = sigma_sZero, sigma_sNonZero = sigma_sNonZero, iter=iter, burn_in=burn_in)
}
else{
set.seed(1234)
assign.wrapper(trainingData=trainingData, testData=testData, trainingLabel=trainingLabel1,
geneList=geneList, n_sigGene=g, adaptive_B=T, adaptive_S=F, mixture_beta=F, S_zeroPrior = S_zeroPrior,
outputDir=adapB_folder, sigma_sZero = sigma_sZero, sigma_sNonZero = sigma_sNonZero, iter=iter, burn_in=burn_in)
set.seed(1234)
assign.wrapper(trainingData=trainingData, testData=testData, trainingLabel=trainingLabel1,
geneList=geneList, n_sigGene=g, adaptive_B=T, adaptive_S=T, mixture_beta=F, S_zeroPrior = S_zeroPrior,
outputDir=adap_adap_folder, sigma_sZero = sigma_sZero, sigma_sNonZero = sigma_sNonZero, iter=iter, burn_in=burn_in)
}
}
testSig <- function(sigProtein, numGenes=NA, geneList =NULL, trainingData, testData, trainingLabels, sigma_sZero = 0.01, sigma_sNonZero = 1, S_zeroPrior = TRUE){
names(sigProtein)=names(geneList)=strsplit(sigProtein,"_")[[1]][1]
trainingLabel<-list(control=list(sigProtein=1:trainingLabels[1]),sigProtein=(trainingLabels[1]+1):(trainingLabels[1]+trainingLabels[2]))
names(trainingLabel$control)=names(trainingLabel)[2]=names(sigProtein)
if(is.na(numGenes)){
sub_dir<-paste(basedir,paste(sigProtein,"gene_list", sep="_"),sep='/')
}
else{
sub_dir<-paste(basedir,paste(sigProtein,numGenes,"gene_list", sep="_"),sep='/')
}
dir.create(sub_dir)
assign_easy_multi(trainingData = trainingData,test=testData,trainingLabel1 = trainingLabel,
g=numGenes,geneList = geneList,out_dir_base = sub_dir,single = 1,
sigma_sZero = sigma_sZero, sigma_sNonZero = sigma_sNonZero, S_zeroPrior = S_zeroPrior)
}
getGeneList = function(rDataPath){
load(rDataPath)
#for a gene list
output.data$processed.data$diffGeneList
#signature matrix with coefficients
output.data$processed.data$S_matrix
}
writeFile = function(variable, filename){
write.table(variable, filename ,sep='\t', col.names = NA,quote=F)
}
testSig_multi <- function(sigProteins, numGenes=NA,geneList =NULL, trainingData, testData, trainingLabel){
sub_dir<-paste(basedir,paste(sigProteins,"gene_list", sep="_"),sep='/')
dir.create(sub_dir)
assign_easy_multi(trainingData = trainingData,test=testData,trainingLabel1 = trainingLabel,
g=numGenes,geneList = geneList,out_dir_base = sub_dir)
}