forked from irinagain/SING
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paths8_estimation_sim_setting_3.R
153 lines (110 loc) · 5.23 KB
/
s8_estimation_sim_setting_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Simulation Setting 3 (True components are sparse)
# Identification of the number of components + methods application
# Load the simulated data as well as true components
load("SimulatedLargeScaleindiv10_Sparse.Rdata")
# Source functions for estimation
source("jngcaFunctions.R")
source("mCCAjointICA.R")
# Center X and Y
n = nrow(dX)
pX = ncol(dX)
pY = ncol(dY)
dXcentered <- dX - matrix(rowMeans(dX), n, pX, byrow = F)
dYcentered <- dY - matrix(rowMeans(dY), n, pY, byrow = F)
## Apply Joint ICA
#############################################################
out_jointICA <- jointICA(dXcentered, dYcentered, r0 = 2)
# Prepare the output list for joint ICA
save(out_jointICA, file = "jointICA_LargeScaleindiv10_Sparse.Rda")
# Joint rank estimation based on oversaturated models
#####################################################
estX_JB = mlcaFP(xData = t(dX), n.comp = 48, whiten = 'sqrtprec', restarts.pbyd = 20, distribution='JB')
Mx_JB = est.M.ols(sData = estX_JB$S, xData = t(dX))
estY_JB = mlcaFP(xData = t(dY), n.comp = 48, whiten = 'sqrtprec', restarts.pbyd = 20, distribution='JB')
# Get joint components out
My_JB = est.M.ols(sData = estY_JB$S, xData = t(dY))
alpha = 0.05
nperms = 1000
matchedResults = angleMatchICA(t(Mx_JB), t(My_JB))
permuteJoint = permmatRank_joint(matchedResults, nperms = nperms)
joint_rank = min(which(permuteJoint$pvalues > alpha)) - 1
pval_joint = permuteJoint$pvalues
joint_rank # selects rank 2
## Apply separate JB
#############################################################
# JB on X
estX_JB = mlcaFP(xData = t(dX), n.comp = 12, whiten = 'sqrtprec', restarts.pbyd = 20, distribution='JB')
Uxfull <- estX_JB$Ws
# Match mixing matrices to get correct ordering, then can get starting points
Mx_JB = est.M.ols(sData = estX_JB$S, xData = t(dX))
errorMx = frobICA(M1 = Mx_JB, M2 = t(mj), standardize = T)*sqrt(ncol(Mx_JB))
errorMx # 0.225
errorSx = frobICA(S1 = t(Sx[1:2, ]), S2 = estX_JB$S, standardize = T)
errorSx # 0.029
# JB on Y
estY_JB = mlcaFP(xData = t(dY), n.comp = 12, whiten = 'sqrtprec', restarts.pbyd = 20, distribution='JB')
Uyfull <- estY_JB$Ws
# Get joint components out
My_JB = est.M.ols(sData = estY_JB$S, xData = t(dY))
errorMy = frobICA(M1 = My_JB, M2 = t(mj), standardize = T)*sqrt(ncol(My_JB))
errorMy # 0.168
errorSy = frobICA(S1 = t(Sy[1:2, ]), S2 = estY_JB$S, standardize = T)
errorSy # 0.015
# Prepare the output list for separate JB
output_sepJB <- list(errorSx = errorSx, errorSy = errorSy, errorMx = errorMx, errorMy = errorMy, estX_JB = estX_JB, estY_JB = estY_JB, mj = mj)
save(output_sepJB, file = "sepJB_LargeScale_Sparse.Rda")
# Joint rank estimation based on separate models fitted with true number of components
#####################################################
# Rank estimation via permutation
alpha = 0.05
nperms = 1000
matchedResults = angleMatchICA(t(Mx_JB), t(My_JB))
permuteJoint = permmatRank_joint(matchedResults, nperms = nperms)
joint_rank = min(which(permuteJoint$pvalues > alpha)) - 1
pval_joint = permuteJoint$pvalues
joint_rank # selects rank 2
# Whiten dX and dY
###################################
# Scale rowwise
est.sigmaXA = tcrossprod(dXcentered)/(pX-1) ## since already centered, can just take tcrossprod
whitenerXA = est.sigmaXA%^%(-0.5)
invLx = est.sigmaXA%^%(0.5)
xDataA = whitenerXA %*% dXcentered
# For Y
# Scale rowwise
est.sigmaYA = tcrossprod(dYcentered)/(pY-1) ## since already centered, can just take tcrossprod
whitenerYA = est.sigmaYA%^%(-0.5)
yDataA = whitenerYA %*% dYcentered
invLy = est.sigmaYA%^%(0.5)
# Starting points for the algorithm are Us
###########################################
matchMxMy = greedymatch(t(Mx_JB), t(My_JB), Ux = t(Uxfull), Uy = t(Uyfull))
cor(matchMxMy$Mx[, 1:2], matchMxMy$My[, 1:2]) # 0.97 and 0.95
# Calculate JB values
JBall = calculateJB(matchMxMy$Ux[1:2, ], X = xDataA) + calculateJB(matchMxMy$Uy[1:2, ], X = yDataA)
# SING on shared + individual
###################################
# Libraries needed to run C functions
library(Rcpp)
library(RcppArmadillo)
sourceCpp("CfunctionsCurvilinear.cpp")
# JB and tolerance parameters
alpha = 0.8
tol = 1e-10
# small rho
rho = JBall/10
out_indiv_small <- updateUboth_c(invLx = invLx, invLy = invLy, xData = xDataA, yData = yDataA, Ux = matchMxMy$Ux, Uy = matchMxMy$Uy, rho = rho, tol = tol, alpha = alpha, maxiter = 1500, r0 = 2)
save(out_indiv_small, file = "out_indiv_small_Sparse.Rda")
# medium rho
rho = JBall
out_indiv_medium <- updateUboth_c(invLx = invLx, invLy = invLy, xData = xDataA, yData = yDataA, Ux = out_indiv_small$Ux, Uy = out_indiv_small$Uy, rho = rho, tol = tol, alpha = alpha, maxiter = 1500, r0 = 2)
save(out_indiv_medium, file = "out_indiv_medium_Sparse.Rda")
# large rho
rho = JBall * 10
out_indiv_large <- updateUboth_c(invLx = invLx, invLy = invLy, xData = xDataA, yData = yDataA, Ux = out_indiv_medium$Ux, Uy = out_indiv_medium$Uy, rho = rho, tol = tol, alpha = alpha, maxiter = 1500, r0 = 2)
save(out_indiv_large, file = "out_indiv_large_Sparse.Rda")
###############################################################################
# Apply mCCA + Joint ICA, and calculate the errors
###############################################################################
out_mcca <- mCCA_jointICA(dXcentered, dYcentered, Mx = 12, My = 12, M = 2)
save(out_mcca, file = "mCCA_LargeScale_Sparse.Rda")