Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update stats plots, add longitudinal sample size calculation #98

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
fdd81a1
update plots to match manuscript
PaulBautin Jan 25, 2021
a1a3d3c
add:
PaulBautin Mar 11, 2021
d48a7b8
- update README for longitudinal study sample size
PaulBautin Mar 11, 2021
645dcc1
update ref in csa_rescale_stat for sample size computation
PaulBautin Mar 11, 2021
f4f5c4b
correct rescale_estimated_subject in README
PaulBautin Mar 11, 2021
79e1269
Create output folder if does not exist
jcohenadad Mar 17, 2021
08b1d01
change std to var in sample size
PaulBautin Mar 17, 2021
c0f0596
Added TODOs
jcohenadad Mar 17, 2021
deb15c6
Merge remote-tracking branch 'origin/graph' into graph
PaulBautin Mar 18, 2021
de28fc3
- correct sample size formula
PaulBautin Mar 18, 2021
f72e107
- correct diff formula by integrating transformation variability (bef…
PaulBautin May 13, 2021
d39cc25
- correct sample size formula
PaulBautin May 13, 2021
ab54398
- remove replacement in diff formula for df_sub
PaulBautin May 13, 2021
a67f773
- use absolute for mean diff and make difference subject dependant
PaulBautin May 13, 2021
ab1eae1
- remove diff abs because useless (SD uses squared difference anyway)
PaulBautin May 13, 2021
7874439
- remove diff abs because useless
PaulBautin May 13, 2021
e599ab7
- change formula for sample size
PaulBautin May 20, 2021
05d7fcd
- Monte Carlo simulation for between group sample size computation
PaulBautin May 20, 2021
f50f18a
- add comments for sample size function
PaulBautin May 20, 2021
be0ebb4
- improve comments for formula var and var_diff
PaulBautin May 21, 2021
31d2ab1
- limit usage of rescale_area to plots
PaulBautin May 21, 2021
02c47e7
- remove rounding before boxplots for csa and atrophy
PaulBautin May 21, 2021
bb990c8
- scatter colorbar takes discrete values
PaulBautin May 21, 2021
7676fcb
- compute sample size using 500 itterations
PaulBautin May 27, 2021
08ff3a5
- update sample size plot to match article
PaulBautin May 27, 2021
d35b6f2
- remove ceil sur chaque calcul de sample size
PaulBautin May 31, 2021
120fa41
add egg info
sandrinebedard Jul 25, 2024
773c191
change sct_deepseg for sct_deepseg_sc
sandrinebedard Jul 25, 2024
e745a3c
change disc file label
sandrinebedard Aug 1, 2024
25524fd
add fix for qform sfrom and fix typo disc labels
sandrinebedard Aug 6, 2024
656e174
change for sct
sandrinebedard Aug 8, 2024
c849a24
modify for python 3.9 compatibility
sandrinebedard Aug 13, 2024
9511c21
setup for compute canada
sandrinebedard Aug 13, 2024
96c1b1d
rsync disc file
sandrinebedard Aug 14, 2024
08bfc54
rm qc from sct_deepseg
sandrinebedard Aug 16, 2024
6e36cbf
fix missing eextension
sandrinebedard Aug 16, 2024
a5942d4
remove extra QC
sandrinebedard Aug 16, 2024
28b8c78
remove one qc report
sandrinebedard Aug 19, 2024
5d37e0a
remove all QC reports
sandrinebedard Aug 19, 2024
bc3e882
remove all qc reports
sandrinebedard Aug 19, 2024
0b40fb6
add logging
sandrinebedard Aug 27, 2024
d7c5bf9
Merge branch 'sb/update-for-ca-python3.9' into graph
sandrinebedard Aug 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
- change formula for sample size
- add comments
PaulBautin committed May 20, 2021
commit e599ab7cfa1c848ab7469d58c6c394a3005e04e2
65 changes: 38 additions & 27 deletions csa_rescale_stat.py
Original file line number Diff line number Diff line change
@@ -335,7 +335,7 @@ def pearson(df, df_rescale):
df_rescale['p_value_csa'] = p_value_csa
return df_rescale

def sample_size(df, df_rescale):
def sample_size(df, df_sub, df_rescale, itt = 300):
""" Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a
two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a
within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test).
@@ -349,26 +349,40 @@ def sample_size(df, df_rescale):
sample_size_90 = []
sample_size_long_80 = []
sample_size_long_90 = []
for rescale_area , group in df.groupby('rescale_area'):
if rescale_area != 100:
# sample size between-subject
CSA_mean_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0]
var = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale_area').get_group(rescale_area)['std_inter'].values[0] ** 2
sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2)))
sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2)))
# sample size within-subject
var_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values[0] ** 2
sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2)))
sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2)))
else:
sample_size_80.append('inf')
sample_size_90.append('inf')
sample_size_long_80.append('inf')
sample_size_long_90.append('inf')
df_rescale['sample_size_80'] = sample_size_80
df_rescale['sample_size_90'] = sample_size_90
df_rescale['sample_size_long_80'] = sample_size_long_80
df_rescale['sample_size_long_90'] = sample_size_long_90
# Compute mean sample size using a Monte Carlo simulation to evaluate variability of measures
for n in range(itt):
for rescale_r, group_r in df.groupby('rescale'):
for sub, subgroup in group_r.groupby('subject'):
# for each scaling and each subject pick one transformation (Monte-Carlo sample)
df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'diff'] = df.loc[(df['rescale'] == 1) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0]
df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values
df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values

for rescale, group in df_sub.groupby('rescale'):
if rescale != 1:
CSA_mean_diff = df_sub.groupby('rescale').get_group(1).mean()['mean']*(1-(rescale**2))
# sample size between-subject
var = df_rescale.groupby('rescale').get_group(1)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_inter'].values[0] ** 2
sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2)))
sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2)))
# sample size within-subject
var_diff = df_rescale.groupby('rescale').get_group(rescale)['std_diff'].values[0] ** 2
sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2)))
sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2)))
else:
sample_size_80.append(np.inf)
sample_size_90.append(np.inf)
sample_size_long_80.append(np.inf)
sample_size_long_90.append(np.inf)
# Compute mean and SD of computed sample sizes
df_rescale['sample_size_80'] = np.mean(np.reshape(sample_size_80, (itt, -1)), axis=0)
df_rescale['std_sample_size_80'] = np.std(np.reshape(sample_size_80, (itt, -1)), axis=0)
df_rescale['sample_size_90'] = np.mean(np.reshape(sample_size_90, (itt, -1)), axis=0)
df_rescale['std_sample_size_90'] = np.std(np.reshape(sample_size_90, (itt, -1)), axis=0)
df_rescale['sample_size_long_80'] = np.mean(np.reshape(sample_size_long_80, (itt, -1)), axis=0)
df_rescale['std_sample_size_long_80'] = np.std(np.reshape(sample_size_long_80, (itt, -1)), axis=0)
df_rescale['sample_size_long_90'] = np.mean(np.reshape(sample_size_long_90, (itt, -1)), axis=0)
df_rescale['std_sample_size_long_90'] = np.std(np.reshape(sample_size_long_90, (itt, -1)), axis=0)
return df_rescale


@@ -454,11 +468,10 @@ def main():
df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale'])
df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa'])
df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa'])
diff = []
sample = []
for rescale, group in df.groupby('rescale'):
for sub, subgroup in group.groupby('subject'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add comment/explanation

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added explanations for the sample size function in commit f50f18a

diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=1)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=1)['MEAN(area)']).values)
df_sub['diff'] = np.concatenate(diff, axis=0)
df_sub.loc[(df_sub['rescale'] == rescale) & (df_sub['subject'] == sub), 'sample'] = subgroup.sample(n=1)['MEAN(area)'].values
# save dataframe in a csv file
df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv'))

@@ -481,10 +494,8 @@ def main():
df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values
df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values
df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values
df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values
df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values
df_rescale = pearson(df_sub, df_rescale)
df_rescale = sample_size(df_sub, df_rescale)
df_rescale = sample_size(df, df_sub, df_rescale)
# save dataframe in a csv file
df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv'))
# plot graph if verbose is present