-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlinear_models.py
60 lines (49 loc) · 2.89 KB
/
linear_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import sklearn.linear_model
from sklearn.model_selection import train_test_split, ShuffleSplit
from himalaya.ridge import RidgeCV
from lib.stats import compute_all_scores, compute_scores
from lib.stats import comp_dice, comp_corr, r2_score, comp_tsss_similiarity, comp_cosine_similiarity
from lib.stats import compute_batch_differentiability_score
data_dir = "/scratch/users/robert.scholz2/acc_dists/"
#task_data_file = "all_10_tasks_254_full_unrelated.raw.npy"
# subjs=np.loadtxt("data/subjs_hcp254_full_unrelated.txt").astype(int).astype(str);
def predict_y_from_x(xdata, ydata, verbose=0, lmodel="ridge", alphas = "auto"): #, pert=None):
# alpha = 0 ~ similair to linear regression; higher alpha ~ more regularization
x_train, x_test, y_train, y_test = train_test_split(xdata, ydata, test_size=0.2, shuffle=False)
if verbose: print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
x = x_train.reshape((-1, x_train.shape[-1]))
#if not(pert is None): x= x[:, pert] = 0
y = y_train.reshape((-1, y_train.shape[-1]))
if verbose: print(x.shape, y.shape)
if lmodel == "ridge":
if isinstance(alphas, str) and alphas == "auto":
alphas = approximate_best_alphas(x, y, n_samples = 29696)
if verbose: print("approx. alphas:", alphas);
skr = sklearn.linear_model.Ridge(alpha = alphas) # abest_alphas
elif lmodel == "linear":
skr = sklearn.linear_model.LinearRegression()
else: raise Exception(f"Desired linear model {lmodel} does not exist/is not implemented");
skr.fit(x, y)
y_train_pred =skr.predict(x).reshape(y_train.shape)
xt = x_test.reshape((-1, x_test.shape[-1]))
#if not(pert is None): xt[:, pert] = 0
y_test_pred =skr.predict(xt).reshape(y_test.shape)
return y_train, y_train_pred, y_test, y_test_pred, skr;
def predict_from_modality_file(subjs, predictor_data_file, task_data_file, data_dir= data_dir, resid_task_maps=False):
isubjs, xdata, ydata = load_xy_data(subjs, data_dir, predictor_data_file, task_data_file, resid_task_maps=resid_task_maps)
#n_predictors = xdata.shape[-1]
return (isubjs, ) + predict_y_from_y(isubjs, xdata, ydata);
## just roguhly estimate the alphas on a subset of data
def approximate_best_alphas(x,y, n_samples = 29696, \
test_alphas = np.logspace(1, 20, 20), cv = ShuffleSplit(n_splits=1, test_size=0.5, random_state=36)):
kernel_ridge_cv = RidgeCV(alphas=test_alphas, cv=cv)
indices = (np.random.rand(n_samples)* len(x)).astype(int)
kernel_ridge_cv.fit(x[indices], y[indices])
return kernel_ridge_cv.best_alphas_
def score(y_train_pred, y_train, desired_scores = {"corr": comp_corr, "r2_score": r2_score}):
vpredicted = np.swapaxes(y_train_pred, 1,2)
vtargets = np.swapaxes(y_train, 1,2)
scoresd = compute_all_scores(vtargets, vpredicted, scores = desired_scores)
diff_scores_test = compute_batch_differentiability_score(vtargets, vpredicted, reduce=False)
return scoresd, diff_scores_test