Skip to content

Commit

Permalink
bug fixes in prep to run
Browse files Browse the repository at this point in the history
  • Loading branch information
dylansutterlin committed Mar 15, 2024
1 parent 6ad1e66 commit 500d863
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 66 deletions.
18 changes: 12 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from scripts import main_con as m
import os

import pickle

p = r"/data/rainville/HYPNOSIS_ASL_ANALYSIS/CBF_normalized"
conf_dir = (
Expand All @@ -10,25 +10,31 @@
#p = r'/home/dsutterlin/projects/test_data/ASL_RS_hypnosis/CBF_4D_normalized'
#conf_dir = False
#pwd_main = r"/home/dsutterlin/projects/resting_state_hypnosis/resting_state_hypnosis"

'''
data, fcdict = m.con_matrix(
p,
pwd_main=pwd_main,
conf_dir=conf_dir,
save_base= os.path.join(pwd_main, 'debug'),
save_folder="difumo64_correl_negw",
save_folder="difumo64_correl_noProc",
atlas_name="difumo64",
sphere_coord = [(54, -28, 26)],
connectivity_measure="correlation",
n_sub = 3,
n_sub = 10,
verbose=True,
remove_ROI_maps = [8,14,43], # based on masker report, outside brain or no interest
remove_subjects = ['APM_07_H1', 'APM_11_H1', 'APM_22_H2'] # based on rainville et al., 2019 and .xlsx file
)
graphs = m.connectome_analyses(data, fcdict, n_iter = 100)
graphs = m.connectome_analyses(data, fcdict, bootstrap = 10)
'''
with open(os.path.join(pwd_main,'debug', 'difumo64_correl_noProc', 'data.pkl'),'rb') as f:
data = pickle.load(f)

with open(os.path.join(pwd_main,'debug', 'difumo64_correl_noProc', 'graphs_dict.pkl'),'rb') as f:
graphs = pickle.load(f)

cv_results = m.prediction_analyses(data, graphs, save_to, n_permut = 100, verbose = False)
cv_results = m.prediction_analyses(data, graphs, n_permut = 10, verbose = False)

'''
import matplotlib.pyplot as plt
Expand Down
37 changes: 19 additions & 18 deletions scripts/main_con.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import copy
import glob
import numpy as np
import pandas as pd
import nibabel as nib
from scipy.stats import pearsonr
from nilearn.maskers import (
Expand Down Expand Up @@ -221,12 +222,15 @@ def con_matrix(
save_to_plot = os.path.join(save_base, save_folder, 'plots')
if os.path.exists(save_to_plot) is False:
os.mkdir(save_to_plot)

if os.path.exists(os.path.join(save_to, 'reports')) is False:
os.mkdir(os.path.join(save_to, 'reports'))
voxel_masker.generate_report().save_as_html(os.path.join(save_to, 'reports', 'voxelMasker_report.html'))
masker.generate_report(displayed_maps='all').save_as_html(os.path.join(save_to,'reports','mapsMasker_report.html'))

data.atlas_labels = atlas_labels
data.save_to = save_to

with open(os.path.join(save_to, "dict_connectomes.pkl"), "wb") as f:
pickle.dump(fcdict, f)
with open(os.path.join(save_to, 'data.pkl'), 'wb') as f:
Expand All @@ -239,13 +243,13 @@ def con_matrix(
if os.path.exists(os.path.join(save_to,'NBS_txtData')) is False:
os.mkdir(os.path.join(save_to,'NBS_txtData'))
func.export_txt_NBS(os.path.join(save_to,'NBS_txtData'), atlas, atlas_labels, fcdict['pre_connectomes'],fcdict["post_connectomes"], fcdict["diff_weight_connectomes"], data.subjects)

print('---PREPARING AND SAVING PLOTS---')
# Plotting connectomes and weights distribution
for cond, matrix_list in zip(fcdict['conditions'],[
fcdict["pre_connectomes"],fcdict['post_connectomes'], fcdict["diff_weight_connectomes"]]):
plot_func.dist_mean_edges(cond, matrix_list, save_to_plot)

# Replication of Rainville et al., 2019 automaticity ~ rCBF in parietal Operculum(supramarg. gyrus in difumo64)
Y = data.phenotype
vd = 'Abs_diff_automaticity'
Expand All @@ -258,14 +262,11 @@ def con_matrix(
plot_func.visu_correl(auto, tvalues, save_to_plot, vd_name = vd, vi_name = 'T test change in PO', title = 'Automaticity score vs mean ttest')
print('---DONE with connectivity matrices and plots---')

data.atlas_labels = atlas_labels
data.save_to = save_to

return data, fcdict



def connectome_analyses(data, fcdict, bootstrap = 10 ):
def connectome_analyses(data, fcdict, bootstrap = 1000 ):

subjects = data.subjects
atlas_labels = data.atlas_labels
Expand Down Expand Up @@ -330,7 +331,7 @@ def connectome_analyses(data, fcdict, bootstrap = 10 ):
return graphs


def prediction_analyses(data, graphs, n_permut = 5000, test_size = 0.20, pca = '90%', verbose = False):
def prediction_analyses(data, graphs, n_permut = 5000, test_size = 0.20, pca = 0.90, verbose = False):
# Models to test
# - Yi ~ Strenghts, Clustering, EigenCentrality, LocalEfficiency
# Yi ~ PO multivariate node metrics
Expand All @@ -343,39 +344,39 @@ def prediction_analyses(data, graphs, n_permut = 5000, test_size = 0.20, pca = '
"Mental_relax_absChange",
"Abs_diff_automaticity",
]
pred_node_metrics = ['strength','strengthnorm', 'eigenCent', 'localEfficiency']
pred_node_metrics = ['strength','strengthnorm', 'eigenCent'] #'localEfficiency']
#Xmat_names = ["Degree", "Closeness", "Betweenness", "Clustering", "Edge_weights"]
cv_results = dict(pre= dict(), post=dict(), change=dict())
cv_results['phenotype'] = Y
single_ROI_reg = ['Supramarginal gyrus', 'Anterior Cingulate Cortex', 'Cingulate gyrus mid-anterior',' Cingulate cortex posterior'] # PO,
subjects = data.subjects
save_to = data.save_to
atlas_labels = data.atlas_labels

print(r'---CROSS-VAL REGRESSION [Yi ~ Node] METRICS---')



# 1) Extracts metric column for each node (1 x Nodes) and stack them/sub --> (N sub x Nodes) ~ Yi
# 2) Compute CV regression for Yi variable
for node_metric in pred_node_metrics:
feat_mat = pd.DataFrame(np.array([sub_mat[node_metric] for sub_mat in graphs['change_nodes']]), columns = atlas_labels, index = subjects)
cv_results['change'][node_metric] = graphsCV.regression_cv(feat_mat, Y, target_col, n_permut = n_permut, test_size = test_size, pca=pca)
cv_results['change'][node_metric] = graphsCV.regression_cv(feat_mat, Y, target_col, pred_metric_name = node_metric, n_permut = n_permut, test_size = test_size, pca=pca)

for node_metric in pred_node_metrics:
feat_mat = pd.DataFrame(np.array([sub_mat[node_metric] for sub_mat in graphs['post_nodes_metrics']]), columns = atlas_labels, index = subjects)
cv_results['post'][node_metric] = graphsCV.regression_cv(feat_mat, Y, target_col, n_permut = n_permut, test_size = test_size, pca=pca)
cv_results['post'][node_metric] = graphsCV.regression_cv(feat_mat, Y, target_col, pred_metric_name = node_metric, n_permut = n_permut, test_size = test_size, pca=pca)

# Edge based prediction for post and change
edge_feat_mat = graphsCV.edge_attributes2df(cv_results['post'], edge_metric = 'weight')
cv_results['post']['edge_weight'] = graphsCV.regression_cv(edge_feat_mat, Y, target_col, n_permut = n_permut, test_size = test_size, pca=pca)
cv_results['post']['edge_weight'] = graphsCV.regression_cv(edge_feat_mat, Y, target_col, pred_metric_name = 'weight', n_permut = n_permut, test_size = test_size, pca=pca)

edge_feat_mat = graphsCV.edge_attributes2df(cv_results['change'], edge_metric = 'weight')
cv_results['change']['edge_weight'] = graphsCV.regression_cv(edge_feat_mat, Y, target_col, n_permut = n_permut, test_size = test_size, pca=pca)
cv_results['change']['edge_weight'] = graphsCV.regression_cv(edge_feat_mat, Y, target_col, pred_metric_name = 'weight', n_permut = n_permut, test_size = test_size, pca=pca)

# ROI specific multivariate (multi-node metric) prediction
for roi in single_ROI_reg: # compute N sub x M metrics df for prediction of Yi
roi_feat_mat = pd.DataFrame(np.array([sub_mat.loc[roi,:] for sub_mat in graphs['change_nodes']]), columns = pred_node_metrics, index = subjects)
cv_results['change'][roi] = graphsCV.regression_cv(roi_feat_mat, Y, target_col)
cv_results['change'][roi] = graphsCV.regression_cv(roi_feat_mat, Y, target_col, pred_metric_name = roi)

# Save reg results
with open(os.path.join(save_to, "cv_results.pkl"), "wb") as f:
pickle.dump(cv_results, f)
Expand Down
91 changes: 49 additions & 42 deletions src/graphs_regressionCV.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import glob as glob
import pickle
import pandas as pd
import bct.algorithms as bct
import bct.algorithms as bct
from sklearn.model_selection import permutation_test_score
from scipy.stats import pearsonr
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
Expand Down Expand Up @@ -136,7 +136,7 @@ def edge_attributes2df(Gs_dict, edge_metric = 'weight', subjects = False):
'''
if subjects is False:
subjects = Gs_dict.keys()
assert len(subjects) == Gs_dict.keys(), 'Subjects names and Gs_dict items do not match'
#assert len(subjects) == Gs_dict.keys(), 'Subjects names and Gs_dict items do not match'

tuple_ls = [G.edges().data(edge_metric) for G in Gs_dict.values()] # (node 1, node 2, value)
edges_dfs = [] # to append sub i x pairs of single row dfs
Expand Down Expand Up @@ -224,32 +224,33 @@ def hqs_algorithm(e, cov, var, N, seed, return_correlation=False):
else:
return C


resulting_covariance_matrix = hqs_algorithm(e, v, edash, N)
print("Covariance Matrix:")
print(resulting_covariance_matrix)
# Old code can remove if run success /11 march 24

resulting_correlation_matrix = hqs_algorithm(e, v, edash, N, return_correlation=True)
print("\nCorrelation Matrix:")
print(resulting_correlation_matrix)
#resulting_covariance_matrix = hqs_algorithm(e, v, edash, N)
#print("Covariance Matrix:")
#print(resulting_covariance_matrix)

#resulting_correlation_matrix = hqs_algorithm(e, v, edash, N, return_correlation=True)
#print("\nCorrelation Matrix:")
#print(resulting_correlation_matrix)#


# ---Feature matrices based on subjects' graphs---#
subject_names = list(Gs.keys())
node_names = list(Gs[subject_names[0]].nodes())
#subject_names = list(Gs.keys())
#node_names = list(Gs[subject_names[0]].nodes())#

# Reorganize data structure from lists of vectors to pd (N x nodes array) for each dict key
for metric in list(metric_dict.keys()): # Excluding nodes and communities
tmp_data = metric_dict[metric]
if metric != 'communities' and metric != 'nodes':
metric_dict[metric] = pd.DataFrame(
np.array(tmp_data), columns=labels, index=subjects
)
breakpoint() # check data structure for each nodes ( clustering ?)
#maybe save as a df would be easier to compute 2samp t tests
breakpoint()#store the yeo7 id of ROI tocompute network metrics/subjects
return metric_dict # dict w/ keys = metrics and values = list of list vectors (one list per subject)
#for metric in list(metric_dict.keys()): # Excluding nodes and communities
# tmp_data = metric_dict[metric]
# if metric != 'communities' and metric != 'nodes':
# metric_dict[metric] = pd.DataFrame(
# np.array(tmp_data), columns=labels, index=subjects
# )
##breakpoint() # check data structure for each nodes ( clustering ?)
##maybe save as a df would be easier to compute 2samp t tests
##breakpoint()#store the yeo7 id of ROI tocompute network metrics/subjects

#return metric_dict # dict w/ keys = metrics and values = list of list vectors (one list per subject)

def rand_graphs(dict_graphs, subjects, n_permut):
'''
Expand All @@ -260,14 +261,14 @@ def rand_graphs(dict_graphs, subjects, n_permut):
for i, sub in enumerate(subjects):
perms = []
seeds = np.random.randint(0, 1000000, n_permut)

return perm_graphs


def node_metric_diff(post_df, pre_df, subjects):

change_dfs = []
for i, sub in range(len(subjects)):
for i, sub in enumerate(subjects):
assert list(post_df[i].columns) == list(pre_df[i].columns)

temp_df = post_df[i].copy(deep=True)
Expand All @@ -285,7 +286,7 @@ def bootstrap_pvals_df(df_metric_ls, dict_dfs_permut, subjects, mult_comp = 'fdr
for i, sub in enumerate(subjects):
df_metric = df_metric_ls[i]
rand_dist_3d = np.stack(dict_dfs_permut[sub], axis=2) # stack list of 2d rand dfs on 3d dim
assert df_metric.shape == rand_dist.shape[0:2]
assert df_metric.shape == rand_dist_3d.shape[0:2]
#mean_val = np.mean(rand_dist, axis=2)
#td_val = np.std(rand_dist, axis=2)

Expand All @@ -297,7 +298,7 @@ def bootstrap_pvals_df(df_metric_ls, dict_dfs_permut, subjects, mult_comp = 'fdr
p_df[i, j] = np.mean(np.abs(rand_dist_3d[i, j, :]) >= np.abs(cell_value)) # Two-tailed test

p_values_df = pd.DataFrame(p_df, index=df_metric.index, columns=df_metric.columns)

if mult_comp == 'fdr_bh':
for column in p_values_df.columns:
p_values = p_values_df[column]
Expand All @@ -307,7 +308,7 @@ def bootstrap_pvals_df(df_metric_ls, dict_dfs_permut, subjects, mult_comp = 'fdr

if mult_comp == 'fdr_bn':
print('FDR correction applied to change metric p-values')

return pval_dfs_ls


Expand Down Expand Up @@ -336,7 +337,7 @@ def compute_permutation(
n_components=0.80,
n_permutations=5000,
scoring="r2",
random_seed=40
rdm_seed=40
):
"""
Compute the permutation test for a specified metric (r2 by default)
Expand Down Expand Up @@ -372,7 +373,7 @@ def compute_permutation(
scoring=scoring,
cv=cv,
n_permutations=n_permutations,
random_state=random_seed,
random_state=rdm_seed,
)

return score, perm_scores, pvalue
Expand All @@ -386,7 +387,7 @@ def compute_permutation(
from scipy.stats import pearsonr
import numpy as np

def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 5000, test_size = 0.2, pca='80%'):
def regression_cv(features_matrix, Y, target_columns, pred_metric_name = 'X_pred', rdm_seed=40, n_permut = 5000, test_size = 0.2, pca='80%'):

"""
Compute the regression with cross-validation for each graph metric
Expand All @@ -397,18 +398,18 @@ def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 50
metrics_names : list of strings from the graph metrics but written in a way that will figure in the plots outputs.
"""


mean_metrics = []
out_dict = dict()

#out_dict = dict()
result_Yi = dict()

prePipeline = Pipeline([("scaler", StandardScaler())])
feat_mat = prePipeline.fit_transform(features_matrix)
pipeline = Pipeline([("pca", PCA(n_components=pca)), ("reg", Ridge(alpha=1.0))])
# cv = KFold(n_splits=5, random_state=randrdm_seedom_seed, shuffle=True)
cv = ShuffleSplit(n_splits=10, test_size=test_size, random_state=rdm_seed)

for target_column in target_columns:
print(f"--- {target_column} ---")
print(f"[CV-regression] : __{target_column} ~ -{pred_metric_name}__")
y_preds = []
y_tests = []
y = Y[target_column].values
Expand All @@ -420,6 +421,7 @@ def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 50
rmse_scores = []
n_components = []
all_coefficients = []
all_corr_coefficients = []

for train_index, test_index in cv.split(feat_mat):
# Split the data into train and test sets based on the current fold
Expand All @@ -441,11 +443,11 @@ def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 50
rmse = np.sqrt(mse)
cov_x = np.cov(
pipeline.named_steps["pca"]
.transform(X_test)
.transform(X_train)
.transpose()
.astype(np.float64)
)
cov_y = np.cov(y_test.transpose().astype(np.float64))
cov_y = np.cov(y_train.transpose().astype(np.float64))

# Append metrics to the respective lists
pearson_r_scores.append(pearson_r)
Expand All @@ -464,14 +466,18 @@ def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 50
# print('cov_x type and shape', type(cov_x), cov_x.shape)
# print('cov y shape', cov_y.shape)
# print('coeff transposed shape', coefficients.transpose().shape)

# correction from Eqn 6 (Haufe et al., 2014)
corr_coeffs = np.matmul(cov_x, coefficients.transpose()) * (1 / cov_y)
all_coefficients.append(
all_corr_coefficients.append(
pipeline.named_steps["pca"].inverse_transform(
corr_coeffs.transpose()
)
)
all_coefficients.append(
pipeline.named_steps["pca"].inverse_transform(
coefficients.transpose()
)
)

# Permutation tests
r2score, _, r2p_value = compute_permutation(
Expand Down Expand Up @@ -509,7 +515,7 @@ def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 50
# avg_z_score = np.mean(np.array(all_coefficients), axis=0) / np.std(all_coefficients, axis=0)
# print(f"Average z-score = {avg_z_score} std = {np.std(all_coefficients, axis=0)}")
# Plot
plot_title = f"{metrics_name} based CV-prediction of {target_column}"
plot_title = f"{pred_metric_name} based CV-prediction of {target_column}"
# reg_plot_performance(
# y_tests,
# y_preds,
Expand All @@ -536,9 +542,10 @@ def regression_cv(features_matrix, Y, target_columns, rdm_seed=40, n_permut = 50
"rmse_p_value": rmse_p_value,
"y_preds": y_preds,
"y_tests": y_tests,
"corr_coeffs": all_coefficients,
"coeffs" : all_coefficients,
"corr_coeffs": all_corr_coefficients,
}

out_dict[metrics_name] = result_Yi
# out_dict[metrics_name] = result_Yi

return out_dict
return result_Yi

0 comments on commit 500d863

Please sign in to comment.