diff --git a/venv/lib/python2.7/site-packages/IPython/core/__init__.py b/build/lib/vmreact-data-visualization/__init__.py
similarity index 100%
rename from venv/lib/python2.7/site-packages/IPython/core/__init__.py
rename to build/lib/vmreact-data-visualization/__init__.py
diff --git a/build/lib/vmreact-data-visualization/vmreact_learning_trajectory_visualization.py b/build/lib/vmreact-data-visualization/vmreact_learning_trajectory_visualization.py
new file mode 100755
index 0000000..da9a26a
--- /dev/null
+++ b/build/lib/vmreact-data-visualization/vmreact_learning_trajectory_visualization.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+Created on Mon Mar 5 17:09:19 2018
+@author: dawlat_local
+import matplotlib.pyplot as plt
+import pandas
+from matplotlib.backends.backend_pdf import PdfPages
+from prettyplotlib import brewer2mpl
+set2 = brewer2mpl.get_map('Set2', 'qualitative', 8).mpl_colors
+set1 = brewer2mpl.get_map('Set1', 'qualitative', 9).mpl_colors
+bmap=brewer2mpl.get_map('Dark2', 'Qualitative',4).mpl_colors
+mpl.rcParams['axes.color_cycle'] = bmap
+#scored csv
+#tp1 tp2 composite scores
+comp=['total_learning', 'corrected_total_learning','learning_rate','proactive_interference','retroactive_interference','forgetting_and_retention']
+comp_2=['total_learning_2', 'corrected_total_learning_2','learning_rate_2','proactive_interference_2','retroactive_interference_2','forgetting_and_retention_2']
+#tp1 tp2 scored data
+y=['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'listb', 'trial6', 'trial7','total_learning', 'corrected_total_learning','learning_rate','proactive_interference','retroactive_interference','forgetting_and_retention']
+y_2=['trial1_2', 'trial2_2', 'trial3_2', 'trial4_2', 'trial5_2', 'listb_2', 'trial6_2', 'trial7_2']
+columns=[c for c in y[0:6]]
+columns=[c for c in comp]
+columns_2=[c for c in comp_2]
+with PdfPages('/Users/lillyel-said/Desktop/tp1_tp2_patient_learning_trajectories.pdf') as pdf:
+ for index,value in test_df.groupby('subj_id'):
+ val=value.loc[:,'trial1':'trial7']
+ fig, axes = plt.subplots(nrows=1, ncols=1)
+ color = 'blue'
+ fig=pt_all_trials.loc[index][y_2].plot\
+ (ax=axes, y=y_2, subplots=True, fontsize=14, grid=True, yticks=range(0,16), ylim=(0,16),xticks=range(0,8), xlim=(-0.20,7.2),color=color,marker='o', linewidth=3.0, markersize=3.5)
+ fig2=pt_all_trials.loc[index][y].plot\
+ (ax=axes, y=y, subplots=True, fontsize=14, grid=True, yticks=range(0,16), ylim=(0,16),xticks=range(0,8), xlim=(-0.20,7.2), marker='o', linewidth=3.0, markersize=3.5)
+ title= 'Learning Trajectory for Pt: ' + str(index.replace("'", "").replace('.0',""))
+ axes.set_title(title, fontsize=15)
+ axes.legend(['tp1','tp2'])
+ pdf.savefig()
+with PdfPages('/Users/lillyel-said/Desktop/patient_specific_learning_trajectories.pdf') as pdf:
+ for index,value in test_df.groupby('subj_id'):
+ print index,value
+ axes = plt.subplot(111)
+ fig=pt_all_trials.loc[index].astype(float).plot(ax=axes, y=y, subplots=False, fontsize=12, grid=True,yticks=range(0,16), ylim=(0,16), figsize=(14,10), xticks=range(0,6),xlim=(-0.2,4.2), marker='o', linewidth=2, markersize=5,color='purple')
+ fig=pt_all_trials.loc[index].astype(float).plot(ax=axes, y=y, subplots=False, fontsize=16, grid=True,yticks=range(0,16), ylim=(0,16), figsize=(14,10), xticks=range(0,6),xlim=(-0.2,4.2), marker='o', linewidth=2, markersize=5,color='purple')
+ title= 'Patient Learning Trials'
+ axes.set_title(title, fontsize=20)
+ pdf.savefig()
+ plt.cla()
+ plt.clf()
+ plt.close()
+ for ix, value in test_df.groupby('subj_id'):
+ axes2= plt.subplot(121)
+ learning2=pt_all_trials.loc[(ix)][['trial5','trial7']].astype(float).plot(ax=axes2,fontsize=15, subplots=False, grid=True, yticks=range(0,16), figsize=(14,8), ylim=(0,16), xticks=range(0,16), xlim=(-0.2,1.2), marker='o', linewidth=1.75, markersize=5.0, color='#96D38C')
+ axes2.yaxis.set_ylabel=('# of words remembered')
+ pdf.savefig()
+ new_df=pt_all_trials[['trial5','trial7']].astype(int)
+ new_df["diff_7_5"] = pt_all_trials["trial7"].sub(patient_testdf["trial5"].astype(int),axis=0)
+ sorted_df=new_df.sort_values(['diff_7_5'])
+ test_group5_7=[]
+ for idx,val in sorted_df.groupby(level=0):
+ if (val.loc[:,'diff_7_5'] > 0).bool():
+ print idx, '1'
+ test_group5_7.append([idx,1])
+ if (val.loc[:,'diff_7_5'] < -5).bool():
+ print idx, '3'
+ test_group5_7.append([idx,3])
+ else:
+ print idx , '2'
+ test_group5_7.append([idx,2])
+ df_with_groupings=pandas.DataFrame(data=test_group5_7,columns=['subj_id','group'])
+ new_df=new_df.reset_index()
+ merged_df=pandas.merge(new_df, df_with_groupings, on='subj_id',copy=True, indicator=False).reset_index()
+ plt.cla()
+ plt.clf()
+ plt.close()
+#trials 5 scores vs trial 7 delayed scores
+with PdfPages('/Users/lillyel-said/Desktop/patient_specific_learning_trajectories.pdf') as pdf:
+ for index,value in merged_df.groupby('subj_id'):
+ ax2= plt.subplot(121)
+ color = 'coral' if (value['group'] == 1).any() else'skyblue' if (value['group'] == 2).any() else 'lightgreen'
+ learning_to_delay=patient_testdf.loc[(index)][['trial5','trial7']].astype(float).plot(ax=ax2,fontsize=15, grid=True, yticks=range(0,16), figsize=(16,10), ylim=(0,16), xticks=range(0,16), xlim=(-0.2,1.2), marker='o', c=color, subplots=False, linewidth=1.75, markersize=5.0)
+ ax2.set_title='Patient performance grouped based on Trial 5 and 7 scores'
+ handles, labels = axes.get_legend_handles_labels()
+ labels=['group 1: 0+ words remembered', 'group 2: between 0 and -5','group 3: > -5 words remembered' ]
+ display = (0,1,2)
+ ax2.legend([label for i,label in enumerate(labels) if i in display],fontsize=10, bbox_to_anchor=(1.57,1.00), ncol=1)
+ pdf.savefig()
+#trials 5-6 differences
+xtick_labels_5_6=['trial5', 'trial6']
+for index,value in test_df.groupby('subj_id'):
+ val=value.loc[:,'trial1':'trial7']
+ axes1 = plt.subplot(121)
+ fig1=pt_all_trials.loc[(index)][['trial5','trial6']].astype(float).plot(ax=axes1, fontsize=15, grid=True, yticks=range(0,16), figsize=(12,6), ylim=(0,16), xticks=range(0,2), xlim=(-0.2,1.2), marker='.', linewidth=1.5, markersize=5.0, legend=False, color='b')
+ axes1.set_ylabel=('# of words remembered')
+ ax2= plt.subplot(122)
+ learning_to_delay=pt_all_trials.loc[(index)][['trial5','trial7']].astype(float).plot(ax=ax2,fontsize=15, grid=True, yticks=range(0,16), figsize=(12,6), ylim=(0,16), xticks=range(0,16), xlim=(-0.2,1.2), marker='.', linewidth=1.75, markersize=5.0)
+ ax2.legend(fontsize=10,bbox_to_anchor=(1.37,1.10), ncol=1)
diff --git a/venv/lib/python2.7/site-packages/IPython/core/tests/__init__.py b/build/lib/vmreact-master/scripts/__init__.py
similarity index 100%
rename from venv/lib/python2.7/site-packages/IPython/core/tests/__init__.py
rename to build/lib/vmreact-master/scripts/__init__.py
diff --git a/venv/lib/python2.7/site-packages/IPython/extensions/tests/__init__.py b/build/lib/vmreact-master/scripts/grader/__init__.py
similarity index 100%
rename from venv/lib/python2.7/site-packages/IPython/extensions/tests/__init__.py
rename to build/lib/vmreact-master/scripts/grader/__init__.py
diff --git a/build/lib/vmreact-master/scripts/grader/complete_inquisit_output.py b/build/lib/vmreact-master/scripts/grader/complete_inquisit_output.py
new file mode 100755
index 0000000..12b6a03
--- /dev/null
+++ b/build/lib/vmreact-master/scripts/grader/complete_inquisit_output.py
@@ -0,0 +1,54 @@
+import argparse
+import datetime
+import os
+from composite_scores import composite_scores
+from inquisit_demo_summary import demo_and_summary
+from inquisit_demo_summary_newageranges import demo_and_summary_new
+from inquisit_grader import grader
+format = "%Y_%m_%d"
+current_date = datetime.datetime.today()
+date = current_date.strftime(format)
+parser = argparse.ArgumentParser(
+ description='Grades inquisit data, output: frequency counts of responses to demo survey, parsed raw data (all, primacy, recency), scored data (all, primacy, recency), SR responses compiled, subject age ranges and gender, summary ANT scores, word correlations (all, primacy, recency)')
+parser.add_argument('-r', dest='raw_data', help='path to raw data', type=str, required=True)
+parser.add_argument('-d', dest='demo_data', help='demo_csv', type=str, required=True)
+parser.add_argument('-s', dest='summary_data', help='summary csv', type=str, required=True)
+parser.add_argument('-o', dest='output_csv_location', help='path to output folder', type=str, default=os.getcwd())
+args = parser.parse_args()
+if not os.path.isdir(args.output_csv_location):
+ os.mkdir(args.output_csv_location)
+all_subj_data_csv = args.raw_data
+demographic_data = args.demo_data
+final_summary_csv = args.summary_data
+demo_and_summary(all_subj_data_csv, args.demo_data, args.summary_data,
+ os.path.join(args.output_csv_location, 'frequency_counts' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'subj_age_agerange_gender' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'sr_responses' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'summary_ant_scores' + '_' + date + '.csv'))
+demo_and_summary_new(all_subj_data_csv, args.demo_data, os.path.join(args.output_csv_location,
+ 'subj_age_agerange_gender_new_age_bins' + '_' + date + '.csv'))
+grader(all_subj_data_csv, os.path.join(args.output_csv_location, 'parsed_raw_data' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'scored_data' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'word_correlations' + '_' + date + '.csv'), 0)
+grader(all_subj_data_csv, os.path.join(args.output_csv_location, 'parsed_raw_data_primacy' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'scored_data_primacy' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'word_correlations_primacy' + '_' + date + '.csv'), 1)
+grader(all_subj_data_csv, os.path.join(args.output_csv_location, 'parsed_raw_data_recency' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'scored_data_recency' + '_' + date + '.csv'),
+ os.path.join(args.output_csv_location, 'word_correlations_recency' + '_' + date + '.csv'), 2)
+scored_data = os.path.join(args.output_csv_location, 'scored_data' + '_' + date + '.csv')
+composite_scores(1, scored_data, os.path.join(args.output_csv_location, 'composite_scores_vakil' + '_' + date + '.csv'))
diff --git a/build/lib/vmreact-master/scripts/grader/composite_scores.py b/build/lib/vmreact-master/scripts/grader/composite_scores.py
new file mode 100755
index 0000000..48d0848
--- /dev/null
+++ b/build/lib/vmreact-master/scripts/grader/composite_scores.py
@@ -0,0 +1,35 @@
+# !/usr/bin/env python2
+# -*- coding: utf-8 -*-
+Created on Tue Feb 27 12:04:33 2018
+@author: dawlat_elsaid
+import pandas
+def composite_scores(get_comp_scores, input_csv, output_csv):
+ scored_data = pandas.read_csv(input_csv)
+ if get_comp_scores == 1:
+ df_trials = scored_data.loc[:, 'subj_id':'trial7']
+ composite_scores = pandas.DataFrame()
+ tmp = pandas.DataFrame()
+ composite_scores[['subj_id', 'list_type']] = df_trials[['subj_id', 'list_type']]
+ composite_scores['total_learning'] = df_trials[['trial1', 'trial2', 'trial3', 'trial4', 'trial5']].apply(
+ lambda row: pandas.np.sum(row), axis=1)
+ tmp['test'] = df_trials[['trial1']] * 5
+ composite_scores['corrected_total_learning'] = composite_scores['total_learning'].subtract(tmp['test'])
+ composite_scores['learning_rate'] = df_trials['trial5'].subtract(df_trials['trial1'], axis='rows')
+ composite_scores['proactive_interference'] = df_trials['trial1'].subtract(scored_data['listb'], axis='rows')
+ composite_scores['retroactive_interference'] = df_trials['trial5'].subtract(df_trials['trial6'], axis='rows')
+ composite_scores['forgetting_and_retention'] = df_trials['trial5'].subtract(df_trials['trial7'], axis='rows')
+ # composite_scores_transposed=composite_scores.transpose()
+ # composite_scores_transposed.to_csv(output_csv,header=True,index=['measure','score'])
+ composite_scores.to_csv(output_csv, header=True, index=['measure', 'score'])
diff --git a/build/lib/vmreact-master/scripts/grader/inquisit_demo_summary.py b/build/lib/vmreact-master/scripts/grader/inquisit_demo_summary.py
new file mode 100755
index 0000000..58a97af
--- /dev/null
+++ b/build/lib/vmreact-master/scripts/grader/inquisit_demo_summary.py
@@ -0,0 +1,170 @@
+import collections
+import csv
+def demo_and_summary(all_subj_data_csv, demographic_data, final_summary_csv, frequency_count, subj_age_agerange_gender,
+ sr_responses, summary_ant_scores):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ with open(demographic_data, 'U') as file:
+ input_demo_sr_q_csv = csv.reader(file)
+ input_demo_sr_q_csv = map(list, zip(*input_demo_sr_q_csv))
+ demographic_data = dict((rows[0], rows[1:]) for rows in (input_demo_sr_q_csv))
+ with open(final_summary_csv, 'U') as file:
+ final_summary_lines = csv.reader(file)
+ final_summary_lines = map(list, zip(*final_summary_lines))
+ rey_summary = dict((rows[0], rows[1:]) for rows in (final_summary_lines))
+ age_ranges = {
+ '16-19': range(16, 20, 1),
+ '20-29': range(20, 30, 1),
+ '30-39': range(30, 40, 1),
+ '40-49': range(40, 50, 1),
+ '50-59': range(50, 60, 1),
+ '57-69': range(57, 70, 1),
+ '70-79': range(70, 80, 1),
+ '76-89': range(76, 90, 1)
+ }
+ subj_id_list_demo = []
+ subj_id_only_demo = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only_demo.append(subject)
+ subj_id_list_combined = [demographic_data['subject'][x] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subject]
+ subj_id_list_demo.append(subj_id_list_combined)
+ subj_id_combined = [(idx, val) for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_val = []
+ key_val_all = []
+ for key in sorted(demographic_data.keys()):
+ for value in sorted(demographic_data[key]):
+ key_val_all.append([key, value])
+ if 'subject' in key:
+ subj_val.append(value)
+ else:
+ continue
+ subj_id_with_index = list()
+ for subj_num in subj_val:
+ subj_combined = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo)) if val == subj_num]
+ subj_indexvals = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_id_with_index.append(subj_combined)
+ new_demo_dict = dict()
+ for key_var in sorted(demographic_data.keys()):
+ if 'latency' not in key_var and 'group' not in key_var and 'build' not in key_var and 'time' not in key_var and 'date' not in key_var:
+ new_demo_dict[key_var] = []
+ for index1, val1 in enumerate(key_val_all):
+ if val1[0] in new_demo_dict.keys():
+ new_demo_dict[val1[0]].append(val1[1])
+ counter_demo_dict = dict()
+ for key_q in sorted(new_demo_dict.keys()):
+ answer_count = collections.Counter(new_demo_dict[key_q])
+ counter_demo_dict[key_q] = answer_count
+ with open(frequency_count, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['survey_question', 'response_counts'])
+ for key, value in sorted(counter_demo_dict.items()):
+ writer.writerow([key, value])
+ csvfile.close()
+ subj_age_gender_mem = []
+ x = []
+ for idx2, subj_id in enumerate(subj_id_only_demo):
+ subj_age_gen = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]] for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x] == subj_id]
+ y = [[demographic_data['subject'][x]] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subj_id]
+ subj_age_gender_mem.append(subj_age_gen)
+ demo_subj_age_gender = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]]
+ for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x]]
+ raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ key_val = []
+ for key in age_ranges.keys():
+ for val in age_ranges[key]:
+ key_val.append([key, val])
+ id_age_agerange = []
+ with open(subj_age_agerange_gender, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'gender', 'age', 'age_range'])
+ for subj in sorted(demo_subj_age_gender):
+ subj_from_main_raw_list = []
+ ages = subj[2]
+ gender = subj[1]
+ subj_id_raw = [val for val in raw_data_responses if val[0] == subj[0]]
+ for vals in key_val:
+ age_vals = vals[1]
+ age_vals = str(age_vals)
+ if age_vals == ages:
+ complete_list = subj[0] + ',' + gender + "," + age_vals + "," + vals[0]
+ id_age_agerange.append(complete_list)
+ writer.writerow([subj[0], gender, age_vals, vals[0]])
+ csvfile.close()
+ subj_id_only = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only.append(subject)
+ subj_id_memory = [subj_mem_trials for subj_mem_trials in subj_id_only]
+ subj_ids_summary = [x for x in rey_summary['script.subjectid']]
+ subj_ids_summary = sorted(subj_ids_summary)
+ summary_key_val = []
+ for key in sorted(rey_summary.keys()):
+ for value in sorted(rey_summary[key]):
+ summary_key_val.append([key, value])
+ new_summary_dict = dict()
+ for sum_key in sorted(rey_summary.keys()):
+ if 'script.starttime' not in sum_key and 'script.startdate' not in sum_key and 'script.elapsedtime' not in sum_key and 'values.trialcount' not in sum_key and 'values.completed' not in sum_key and 'values.trialcount' not in sum_key and 'parameters.min_validlatency' not in sum_key and 'computer.platform' not in sum_key:
+ new_summary_dict[sum_key] = []
+ for sum_idx, sum_val in enumerate(summary_key_val):
+ if sum_val[0] in new_summary_dict.keys():
+ new_summary_dict[sum_val[0]].append(sum_val[1])
+ subject_summary_sr_responses = [[rey_summary['script.subjectid'][x], rey_summary['expressions.gad_7_total'][x],
+ rey_summary['expressions.phq_total'][x],
+ rey_summary['expressions.pcl_4_total'][x],
+ rey_summary['expressions.pcl_total_hybridscore_corrected'][x]] for x in
+ range(len(rey_summary['script.subjectid'])) if
+ rey_summary['values.end_survey_completed'][x] == '1']
+ subject_summary_ant_scores = [
+ [rey_summary['script.subjectid'][x], rey_summary['expressions.overallpercentcorrect'][x],
+ rey_summary['expressions.meanRT'][x], rey_summary['expressions.stdRT'][x]] for x in
+ range(len(rey_summary['script.subjectid'])) if rey_summary['values.end_survey_completed'][x] == '1']
+ with open(sr_responses, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'gad_7', 'phq', 'pcl_dsm4', 'pcl_hybrid'])
+ for responses in sorted(subject_summary_sr_responses):
+ writer.writerow(responses)
+ csvfile.close()
+ with open(summary_ant_scores, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'percent_correct', 'meanRT', 'stdRT'])
+ for scores in sorted(subject_summary_ant_scores):
+ writer.writerow(scores)
+ csvfile.close()
diff --git a/build/lib/vmreact-master/scripts/grader/inquisit_demo_summary_newageranges.py b/build/lib/vmreact-master/scripts/grader/inquisit_demo_summary_newageranges.py
new file mode 100755
index 0000000..8fb6f04
--- /dev/null
+++ b/build/lib/vmreact-master/scripts/grader/inquisit_demo_summary_newageranges.py
@@ -0,0 +1,91 @@
+import csv
+def demo_and_summary_new(all_subj_data_csv, demographic_data, subj_age_agerange_gender):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ with open(demographic_data, 'U') as file:
+ input_demo_sr_q_csv = csv.reader(file)
+ input_demo_sr_q_csv = map(list, zip(*input_demo_sr_q_csv))
+ demographic_data = dict((rows[0], rows[1:]) for rows in (input_demo_sr_q_csv))
+ age_ranges = {
+ '20-29': range(20, 30, 1),
+ '30-39': range(30, 40, 1),
+ '40-49': range(40, 50, 1),
+ '50-59': range(50, 60, 1),
+ '60-69': range(60, 70, 1),
+ '70-90': range(70, 90, 1)}
+ subj_id_list_demo = []
+ subj_id_only_demo = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only_demo.append(subject)
+ subj_id_list_combined = [demographic_data['subject'][x] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subject]
+ subj_id_list_demo.append(subj_id_list_combined)
+ subj_id_combined = [(idx, val) for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_val = []
+ key_val_all = []
+ for key in sorted(demographic_data.keys()):
+ for value in sorted(demographic_data[key]):
+ key_val_all.append([key, value])
+ if 'subject' in key:
+ subj_val.append(value)
+ else:
+ continue
+ subj_id_with_index = list()
+ for subj_num in subj_val:
+ subj_combined = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo)) if val == subj_num]
+ subj_indexvals = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_id_with_index.append(subj_combined)
+ subj_age_gender_mem = []
+ x = []
+ for idx2, subj_id in enumerate(subj_id_only_demo):
+ subj_age_gen = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]] for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x] == subj_id]
+ y = [[demographic_data['subject'][x]] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subj_id]
+ subj_age_gender_mem.append(subj_age_gen)
+ demo_subj_age_gender = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]]
+ for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x]]
+ raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ key_val = []
+ for key in age_ranges.keys():
+ for val in age_ranges[key]:
+ key_val.append([key, val])
+ id_age_agerange = []
+ with open(subj_age_agerange_gender, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'age', 'age_range', 'gender'])
+ for subj in sorted(demo_subj_age_gender):
+ subj_from_main_raw_list = []
+ ages = subj[2]
+ gender = subj[1]
+ subj_id_raw = [val for val in raw_data_responses if val[0] == subj[0]]
+ for vals in key_val:
+ age_vals = vals[1]
+ age_vals = str(age_vals)
+ if age_vals == ages:
+ complete_list = subj[0] + ',' + age_vals + "," + vals[0] + "," + gender
+ id_age_agerange.append(complete_list)
+ writer.writerow([subj[0], age_vals, vals[0], gender])
+ csvfile.close()
diff --git a/build/lib/vmreact-master/scripts/grader/inquisit_grader.py b/build/lib/vmreact-master/scripts/grader/inquisit_grader.py
new file mode 100755
index 0000000..6fd21a6
--- /dev/null
+++ b/build/lib/vmreact-master/scripts/grader/inquisit_grader.py
@@ -0,0 +1,209 @@
+import collections
+import csv
+from difflib import SequenceMatcher
+from math import ceil
+def grader(all_subj_data_csv, data_output_raw_csv, data_output_scored_csv, word_corr, p_r):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ subj_listtype = []
+ for idx, row in enumerate(all_subj_csv_lines['subject']):
+ if 'rey_list' in all_subj_csv_lines['trialcode'][idx]:
+ subj_listtype.append([all_subj_csv_lines['subject'][idx], all_subj_csv_lines['trialcode'][idx]])
+ set_subj_listtype = []
+ for subj in subj_listtype:
+ if subj not in set_subj_listtype:
+ set_subj_listtype.append(subj)
+ ## count per list type
+ index_number_resp = dict()
+ for list_type in sorted([x for x in set(all_subj_csv_lines['trialcode']) if 'rey_list' in x]):
+ index_number_resp[list_type] = []
+ for idx, response in enumerate(all_subj_csv_lines['response']):
+ if 'recall_response' in all_subj_csv_lines['trialcode'][idx]:
+ if 'listb' not in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][
+ 1]].append(response.lower().strip())
+ elif 'listb' in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][1][
+ :-1] + 'b'].append(response.lower().strip())
+ counter_dict = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ rey_recall_word_count = collections.Counter(index_number_resp[list_type])
+ counter_dict[list_type] = rey_recall_word_count
+ total_response_for_list = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ total_response_for_list[list_type] = sorted(set(index_number_resp[list_type]))
+ if p_r == 0:
+ rey_word_lists = {
+ 'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school', 'parent', 'moon', 'garden',
+ 'hat', 'farmer', 'nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student', 'mother', 'star', 'painting',
+ 'bag', 'wheat', 'mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase', 'cousin', 'earth', 'stairs',
+ 'dog', 'banana', 'town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart', 'desert', 'face', 'letter', 'bed',
+ 'machine', 'milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove', 'mountain', 'glasses', 'towel',
+ 'cloud', 'boar', 'lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge', 'cliff', 'bottle', 'soap',
+ 'sky', 'ship', 'goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus', 'chin', 'bleach', 'soap', 'hotel',
+ 'donkey', 'spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool', 'forest', 'perfume', 'ladder',
+ 'girl', 'foot', 'shield', 'pie', 'insect', 'ball', 'car']
+ }
+ elif p_r == 1:
+ rey_word_lists = {'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool']
+ }
+ elif p_r == 2:
+ rey_word_lists = {'rey_list_presentation_1a': ['nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['shield', 'pie', 'insect', 'ball', 'car']
+ }
+ with open(word_corr, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ for word_list in sorted(total_response_for_list.keys()):
+ word_corrs = []
+ for word in total_response_for_list[word_list]:
+ wordcorrs = [round(SequenceMatcher(None, word, x).ratio(), 3) for x in rey_word_lists[word_list]]
+ word_corrs.append(wordcorrs)
+ writer.writerow([word, max(wordcorrs), rey_word_lists[word_list][wordcorrs.index(max(wordcorrs))]])
+ csvfile.close()
+ subj_id_list = []
+ subj_only = []
+ for subj in sorted(set(all_subj_csv_lines['subject'])):
+ try:
+ subj_list_type = [all_subj_csv_lines['trialcode'][x] for x in range(len(all_subj_csv_lines['subject']))
+ if (all_subj_csv_lines['subject'][x] == subj) and (
+ 'rey_list_presentation_' in all_subj_csv_lines['trialcode'][x])][0]
+ subj_id_list.append([subj, subj_list_type])
+ subj_only.append(subj)
+ except:
+ print "%s has an error in their data" % subj
+ continue
+ full_raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ all_responses = []
+ repeats = []
+ list_b_all = []
+ list_a_all = []
+ with open(data_output_raw_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(('subj_id', 'list_type', 'trial', 'response', 'score'))
+ for response in full_raw_data_responses:
+ subj = response[0]
+ list_to_use = [subj_id_list[x][1] for x in range(len(subj_id_list)) if subj_id_list[x][0] == subj][0]
+ list_a_all.append(list_to_use)
+ list_b = list_to_use[:-1] + 'b'
+ list_b_all.append(list_b)
+ if 'listb' in response[1]:
+ if response[2] in rey_word_lists[list_b]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_b]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_b, response[1].split('_')[0], response[2], response[3]
+ else:
+ if response[2] in rey_word_lists[list_to_use]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_to_use]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_to_use, response[1].split('_')[0], response[2], response[3]
+ writer.writerow(new_row)
+ all_responses.append(response)
+ rep = new_row
+ repeats.append(rep)
+ csvfile.close()
+ trial_breaks = []
+ trial_lines = [all_responses[y][1] for y in range(0, len(all_responses))]
+ trial_breaks = [i for i, x in enumerate(trial_lines[0:])
+ if x.split('_')[0] != trial_lines[i - 1].split('_')[0]]
+ trial_breaks = trial_breaks + [len(all_responses)]
+ subj_scores = []
+ final = []
+ final_repeats = []
+ for idx, val in enumerate(trial_breaks[:-1]):
+ score = 0
+ word_list = []
+ for line in all_responses[trial_breaks[idx]:trial_breaks[idx + 1]]:
+ if line[3] == 1:
+ score = score + 1
+ word_list.append(line[2])
+ test = []
+ for idx, word in enumerate(word_list):
+ test.append([SequenceMatcher(None, word, x).ratio() for x in
+ [y for idx2, y in enumerate(word_list) if idx != idx2]])
+ repeats = 0
+ for word in test:
+ word_thresholded = [ceil(x) for x in word if x > 0.8]
+ n = sum(word_thresholded)
+ if n != 0:
+ repeats = repeats + (((n * (n + 1)) - 1) / (n + 1))
+ subj_scores.append([line[0], line[1].split('_')[0], score, repeats])
+ with open(data_output_scored_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(
+ ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'trial6', 'trial7',
+ 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats', 'trial3_#_repeats', 'trial4_#_repeats',
+ 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats'])
+ subj_scores = subj_scores + ['placeholder']
+ for idx, scores in enumerate(sorted(subj_scores[:-1])):
+ scored = str(scores[2] - scores[3])
+ repeat_nm = scores[3]
+ final.append(scored)
+ final_repeats.append(repeat_nm)
+ subj_id = [scores[0]]
+ for idx2, val in enumerate(subj_id_list):
+ if subj_id[0] == subj_id_list[idx2][0]:
+ subj_list = subj_id_list[idx2][1].split('_')[3]
+ final_row = subj_id + [subj_list] + final + final_repeats
+ if scores[0] != sorted(subj_scores)[idx + 1][0]:
+ writer.writerow(final_row)
+ final_row = []
+ subj_id = []
+ final = []
+ final_repeats = []
+ csvfile.close()
diff --git a/build/lib/vmreact-master/scripts/grader/run_composite_scoring.py b/build/lib/vmreact-master/scripts/grader/run_composite_scoring.py
new file mode 100755
index 0000000..1b8cc9e
--- /dev/null
+++ b/build/lib/vmreact-master/scripts/grader/run_composite_scoring.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+Created on Tue Feb 27 12:04:33 2018
+@author: dawlat_local
+import datetime
+import os
+import sys
+from composite_scores import composite_scores
+input_csv = sys.argv[1]
+output_path = sys.argv[2]
+format = "%Y_%m_%d"
+current_date = datetime.datetime.today()
+date = current_date.strftime(format)
+output_csv = os.path.join(output_path, 'composite_scores_vakil' + '_' + date + '.csv')
+composite_scores(1, input_csv, output_csv)
diff --git a/build/lib/vmreact-merges/2vmreact_compilation.py b/build/lib/vmreact-merges/2vmreact_compilation.py
new file mode 100755
index 0000000..027da57
--- /dev/null
+++ b/build/lib/vmreact-merges/2vmreact_compilation.py
@@ -0,0 +1,572 @@
+# coding: utf-8
+# In[24]:
+import datetime
+from difflib import SequenceMatcher
+from glob import glob
+from math import ceil
+from shutil import copy, move
+import pandas as pd
+from IPython.display import display
+# #Grading Script
+# In[25]:
+def grader(all_subj_data_csv, data_output_raw_csv, data_output_scored_csv, word_corr, p_r):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ subj_listtype = []
+ for idx, row in enumerate(all_subj_csv_lines['subject']):
+ if 'rey_list' in all_subj_csv_lines['trialcode'][idx]:
+ subj_listtype.append([all_subj_csv_lines['subject'][idx], all_subj_csv_lines['trialcode'][idx]])
+ set_subj_listtype = []
+ for subj in subj_listtype:
+ if subj not in set_subj_listtype:
+ set_subj_listtype.append(subj)
+ ## count per list type
+ index_number_resp = dict()
+ for list_type in sorted([x for x in set(all_subj_csv_lines['trialcode']) if 'rey_list' in x]):
+ index_number_resp[list_type] = []
+ for idx, response in enumerate(all_subj_csv_lines['response']):
+ if 'recall_response' in all_subj_csv_lines['trialcode'][idx]:
+ if 'listb' not in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][
+ 1]].append(response.lower().strip())
+ elif 'listb' in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][1][
+ :-1] + 'b'].append(response.lower().strip())
+ counter_dict = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ rey_recall_word_count = collections.Counter(index_number_resp[list_type])
+ counter_dict[list_type] = rey_recall_word_count
+ total_response_for_list = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ total_response_for_list[list_type] = sorted(set(index_number_resp[list_type]))
+ if p_r == 0:
+ rey_word_lists = {
+ 'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school', 'parent', 'moon', 'garden',
+ 'hat', 'farmer', 'nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student', 'mother', 'star', 'painting',
+ 'bag', 'wheat', 'mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase', 'cousin', 'earth', 'stairs',
+ 'dog', 'banana', 'town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart', 'desert', 'face', 'letter', 'bed',
+ 'machine', 'milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove', 'mountain', 'glasses', 'towel',
+ 'cloud', 'boar', 'lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge', 'cliff', 'bottle', 'soap',
+ 'sky', 'ship', 'goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus', 'chin', 'bleach', 'soap', 'hotel',
+ 'donkey', 'spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool', 'forest', 'perfume', 'ladder',
+ 'girl', 'foot', 'shield', 'pie', 'insect', 'ball', 'car']
+ }
+ elif p_r == 1:
+ rey_word_lists = {'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool']
+ }
+ elif p_r == 2:
+ rey_word_lists = {'rey_list_presentation_1a': ['nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['shield', 'pie', 'insect', 'ball', 'car']
+ }
+ with open(word_corr, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ for word_list in sorted(total_response_for_list.keys()):
+ word_corrs = []
+ for word in total_response_for_list[word_list]:
+ wordcorrs = [round(SequenceMatcher(None, word, x).ratio(), 3) for x in rey_word_lists[word_list]]
+ word_corrs.append(wordcorrs)
+ writer.writerow([word, max(wordcorrs), rey_word_lists[word_list][wordcorrs.index(max(wordcorrs))]])
+ csvfile.close()
+ subj_id_list = []
+ subj_only = []
+ for subj in sorted(set(all_subj_csv_lines['subject'])):
+ try:
+ subj_list_type = [all_subj_csv_lines['trialcode'][x] for x in range(len(all_subj_csv_lines['subject']))
+ if (all_subj_csv_lines['subject'][x] == subj) and (
+ 'rey_list_presentation_' in all_subj_csv_lines['trialcode'][x])][0]
+ subj_id_list.append([subj, subj_list_type])
+ subj_only.append(subj)
+ except:
+ print "%s has an error in their data" % subj
+ continue
+ full_raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ all_responses = []
+ repeats = []
+ list_b_all = []
+ list_a_all = []
+ with open(data_output_raw_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(('subj_id', 'list_type', 'trial', 'response', 'score'))
+ for response in full_raw_data_responses:
+ subj = response[0]
+ list_to_use = [subj_id_list[x][1] for x in range(len(subj_id_list)) if subj_id_list[x][0] == subj][0]
+ list_a_all.append(list_to_use)
+ list_b = list_to_use[:-1] + 'b'
+ list_b_all.append(list_b)
+ if 'listb' in response[1]:
+ if response[2] in rey_word_lists[list_b]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_b]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_b, response[1].split('_')[0], response[2], response[3]
+ else:
+ if response[2] in rey_word_lists[list_to_use]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_to_use]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_to_use, response[1].split('_')[0], response[2], response[3]
+ writer.writerow(new_row)
+ all_responses.append(response)
+ rep = new_row
+ repeats.append(rep)
+ csvfile.close()
+ trial_breaks = []
+ trial_lines = [all_responses[y][1] for y in range(0, len(all_responses))]
+ trial_breaks = [i for i, x in enumerate(trial_lines[0:])
+ if x.split('_')[0] != trial_lines[i - 1].split('_')[0]]
+ trial_breaks = trial_breaks + [len(all_responses)]
+ subj_scores = []
+ final = []
+ final_repeats = []
+ for idx, val in enumerate(trial_breaks[:-1]):
+ score = 0
+ word_list = []
+ for line in all_responses[trial_breaks[idx]:trial_breaks[idx + 1]]:
+ if line[3] == 1:
+ score = score + 1
+ word_list.append(line[2])
+ test = []
+ for idx, word in enumerate(word_list):
+ test.append([SequenceMatcher(None, word, x).ratio() for x in
+ [y for idx2, y in enumerate(word_list) if idx != idx2]])
+ repeats = 0
+ for word in test:
+ word_thresholded = [ceil(x) for x in word if x > 0.8]
+ n = sum(word_thresholded)
+ if n != 0:
+ repeats = repeats + (((n * (n + 1)) - 1) / (n + 1))
+ subj_scores.append([line[0], line[1].split('_')[0], score, repeats])
+ with open(data_output_scored_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(
+ ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'trial6', 'trial7',
+ 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats', 'trial3_#_repeats', 'trial4_#_repeats',
+ 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats'])
+ subj_scores = subj_scores + ['placeholder']
+ for idx, scores in enumerate(sorted(subj_scores[:-1])):
+ scored = str(scores[2] - scores[3])
+ repeat_nm = scores[3]
+ final.append(scored)
+ final_repeats.append(repeat_nm)
+ subj_id = [scores[0]]
+ for idx2, val in enumerate(subj_id_list):
+ if subj_id[0] == subj_id_list[idx2][0]:
+ subj_list = subj_id_list[idx2][1].split('_')[3]
+ final_row = subj_id + [subj_list] + final + final_repeats
+ if scores[0] != sorted(subj_scores)[idx + 1][0]:
+ writer.writerow(final_row)
+ final_row = []
+ subj_id = []
+ final = []
+ final_repeats = []
+ csvfile.close()
+# #demo and age range function
+# In[26]:
+import os
+import csv
+import collections
+def demo_and_summary_new(all_subj_data_csv, demographic_data, subj_age_agerange_gender):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ with open(demographic_data, 'U') as file:
+ input_demo_sr_q_csv = csv.reader(file)
+ input_demo_sr_q_csv = map(list, zip(*input_demo_sr_q_csv))
+ demographic_data = dict((rows[0], rows[1:]) for rows in (input_demo_sr_q_csv))
+ age_ranges = {
+ '20-29': range(20, 30, 1),
+ '30-39': range(30, 40, 1),
+ '40-49': range(40, 50, 1),
+ '50-59': range(50, 60, 1),
+ '60-69': range(60, 70, 1),
+ '70-90': range(70, 90, 1)}
+ subj_id_list_demo = []
+ subj_id_only_demo = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only_demo.append(subject)
+ subj_id_list_combined = [demographic_data['subject'][x] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subject]
+ subj_id_list_demo.append(subj_id_list_combined)
+ subj_id_combined = [(idx, val) for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_val = []
+ key_val_all = []
+ for key in sorted(demographic_data.keys()):
+ for value in sorted(demographic_data[key]):
+ key_val_all.append([key, value])
+ if 'subject' in key:
+ subj_val.append(value)
+ else:
+ continue
+ subj_id_with_index = list()
+ for subj_num in subj_val:
+ subj_combined = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo)) if val == subj_num]
+ subj_indexvals = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_id_with_index.append(subj_combined)
+ subj_age_gender_mem = []
+ x = []
+ for idx2, subj_id in enumerate(subj_id_only_demo):
+ subj_age_gen = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]] for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x] == subj_id]
+ y = [[demographic_data['subject'][x]] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subj_id]
+ subj_age_gender_mem.append(subj_age_gen)
+ demo_subj_age_gender = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]]
+ for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x]]
+ raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ key_val = []
+ for key in age_ranges.keys():
+ for val in age_ranges[key]:
+ key_val.append([key, val])
+ id_age_agerange = []
+ with open(subj_age_agerange_gender, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'age', 'age_range', 'gender'])
+ for subj in sorted(demo_subj_age_gender):
+ subj_from_main_raw_list = []
+ ages = subj[2]
+ gender = subj[1]
+ subj_id_raw = [val for val in raw_data_responses if val[0] == subj[0]]
+ for vals in key_val:
+ age_vals = vals[1]
+ age_vals = str(age_vals)
+ if age_vals == ages:
+ complete_list = subj[0] + ',' + age_vals + "," + vals[0] + "," + gender
+ id_age_agerange.append(complete_list)
+ writer.writerow([subj[0], age_vals, vals[0], gender])
+ csvfile.close()
+# In[51]:
+format = "%Y_%m_%d"
+current_date = datetime.datetime.today()
+date = current_date.strftime(format)
+output_csv_location = '/Users/lillyel-said/Desktop/vmreact/output/'
+for raw in glob('/Users/lillyel-said/Desktop/vmreact/vmreact/1_rawdata/*/*raw.csv'):
+ raw_data = raw
+ demo_data = raw.replace('raw.csv', 'demo.csv')
+ summary_data = raw.replace('raw.csv', 'summary.csv')
+ prefix = 'mturk_' + os.path.basename(os.path.dirname(raw_data)).split('_')[1] + '_'
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations.csv'), 0)
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data_primacy.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data_primacy.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations_primacy.csv'), 1)
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data_recency.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data_recency.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations_recency.csv'), 2)
+ demo_and_summary_new(raw_data, demo_data, os.path.join(output_csv_location, prefix + 'age_range_gender.csv'))
+ copy(demo_data, os.path.join(output_csv_location, prefix + 'demo.csv'))
+ copy(summary_data, os.path.join(output_csv_location, prefix + 'summary.csv'))
+# In[13]:
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/output/'
+for scored_csv in glob(os.path.join(scored_dir, '*scored*')):
+ with open(scored_csv, 'U') as source:
+ rdr = csv.reader(source)
+ with open(os.path.join(scored_dir, 'tmp.csv'), 'wb') as result:
+ wtr = csv.writer(result)
+ for r in rdr:
+ wtr.writerow(r[0:18])
+ move(os.path.join(scored_dir, 'tmp.csv'), scored_csv)
+ print scored_csv
+# In[ ]:
+# Getting composite scores from scored
+# In[14]:
+import pandas
+import os
+def composite_scores(input_csv, output_csv):
+ scored_data = pandas.read_csv(input_csv)
+ print input_csv
+ df_trials = scored_data.loc[:, 'trial1':'trial7']
+ print df_trials.columns.tolist()
+ composite_scores = pandas.DataFrame()
+ tmp = pandas.DataFrame()
+ composite_scores['total_learning'] = df_trials[['trial1', 'trial2', 'trial3', 'trial4', 'trial5']].apply(
+ lambda row: np.sum(row), axis=1)
+ tmp['test'] = df_trials['trial1'].tolist() * 5
+ composite_scores['corrected_total_learning'] = composite_scores['total_learning'].subtract(tmp['test'])
+ composite_scores['learning_rate'] = df_trials['trial5'].subtract(df_trials['trial1'], axis='rows')
+ composite_scores['proactive_interference'] = df_trials['trial1'].subtract(scored_data['listb'], axis='rows')
+ composite_scores['retroactive_interference'] = df_trials['trial5'].subtract(df_trials['trial6'], axis='rows')
+ composite_scores['forgetting_and_retention'] = df_trials['trial5'].subtract(df_trials['trial7'], axis='rows')
+ composite_scores_transposed = composite_scores.transpose()
+ composite_scores_transposed.to_csv(output_csv, header=True, index=['measure', 'score'])
+ composite_scores.to_csv(output_csv, header=True, index=['measure', 'score'])
+for scored in glob('/Users/lillyel-said/Desktop/vmreact/output/*_scored_data.csv'):
+ composite_scores(scored, scored.replace('_scored_data.csv', '_composite_scores.csv'))
+# In[7]:
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/output/'
+demo_cols = []
+clin_raw_cols = []
+sum_cols = ['script.startdate', 'script.starttime', 'subject',
+ 'expressions.gad_7_total', 'expressions.phq_total', 'expressions.pcl_4_total',
+ 'expressions.pcl_total_hybridscore_corrected', 'expressions.pcl_total_hybridscore_uncorrected']
+scored_cols = ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3',
+ 'trial4', 'trial5', 'trial6', 'trial7', 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats',
+ 'trial3_#_repeats', 'trial4_#_repeats', 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats']
+composite_cols = ['subject', 'total_learning', 'corrected_total_learning', 'learning_rate',
+ 'proactive_interference', 'retroactive_interference', 'forgetting_and_retention']
+age_range_gender_cols = ['age_range']
+for batch in range(1, 9):
+ batch = str(batch)
+ demo = os.path.join(scored_dir, 'mturk_batch' + batch + '_demo.csv')
+ clin_raw = os.path.join(scored_dir, 'mturk_batch' + batch + '_end.csv')
+ summ = os.path.join(scored_dir, 'mturk_batch' + batch + '_summary.csv')
+ scored = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data.csv')
+ composite = os.path.join(scored_dir, 'mturk_batch' + batch + '_composite_scores.csv')
+ age_range_gender_csv = os.path.join(scored_dir, 'mturk_batch' + batch + '_age_range_gender.csv')
+ demo_df = pd.read_csv(demo, dtype=str)
+ # demo_cols.extend([x for x in demo_df.columns.tolist() if ('latency' not in x and 'online' not in x and 'Unnamed' not in x and 'time_comp' not in x and 'subj_id' not in x)])
+ demo_cols.extend([x for x in demo_df.columns.tolist() if
+ ('latency' not in x and 'Unnamed' not in x and 'subj_id' not in x and 'age_textbox')])
+ print batch
+ age_range_df = pd.read_csv(age_range_gender_csv)
+ age_range_gender_cols.extend(
+ [x for x in age_range_df.columns.tolist() if ('age' not in x and 'subj_id' not in x and 'gender' not in x)])
+ clin_raw_df = pd.read_csv(clin_raw, dtype=str)
+ clin_raw_cols.extend(
+ [x for x in clin_raw_df.columns.tolist() if 'latency' not in x and 'end' not in x and 'Unnamed' not in x])
+ sum_df = pd.read_csv(summ, dtype=str)
+ scored_df = pd.read_csv(scored, dtype=str)
+ comp_df = pd.read_csv(composite, dtype=str).rename(index=str, columns={'Unnamed: 0': 'subject'})
+ age_range_gender = pd.read_csv(age_range_gender_csv, dtype=str)
+demo_cols = list(set(demo_cols))
+clin_raw_cols = list(set(clin_raw_cols))
+print demo_cols
+print clin_raw_cols
+# need to get latency values,
+# use the scored to set the subject ids.
+# append composite to scored_cols since they're in the same order and composite doesn't have subject ids
+# summary - use script.subjectid
+# demo - use subject
+# clin_raw - use subject
+# In[95]:
+import numpy as np
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/vmreact/2_vmreact/'
+latency_csv = os.path.join(scored_dir, 'vmreact_latency_summary.csv')
+for batch in range(1, 9):
+ # for batch in [8]:
+ batch_df = pd.DataFrame()
+ batch = str(batch)
+ print 'mturk_batch' + batch
+ demo = os.path.join(scored_dir, 'mturk_batch' + batch + '_demo.csv')
+ clin_raw = os.path.join(scored_dir, 'mturk_batch' + batch + '_end.csv')
+ sum = os.path.join(scored_dir, 'mturk_batch' + batch + '_summary.csv')
+ scored = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data.csv')
+ primacy = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data_primacy.csv')
+ recency = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data_recency.csv')
+ composite = os.path.join(scored_dir, 'mturk_batch' + batch + '_composite_scores.csv')
+ demo_df = pd.read_csv(demo, dtype=str)
+ clin_raw_df = pd.read_csv(clin_raw, dtype=str)
+ sum_df = pd.read_csv(sum, dtype=str).rename(index=str, columns={'script.subjectid': 'subject'})
+ scored_df = pd.read_csv(scored)
+ primacy_df = pd.read_csv(primacy, dtype=str)
+ recency_df = pd.read_csv(recency, dtype=str)
+ extra_measures = primacy_df.merge(recency_df, on='subj_id', left_index=True, how='left',
+ suffixes=('_primacy', '_recency')).rename(columns={'subj_id': 'subject'})
+ comp_df = pd.read_csv(composite).rename(index=str, columns={'Unnamed: 0': 'subject'})
+ comp_df['subject'] = comp_df['subject'].apply(int)
+ vmreact_df = pd.merge(scored_df, comp_df, left_index=True, right_on='subject', how='left').drop('subject', axis=1)
+ vmreact_df['subj_id'] = vmreact_df['subj_id'].astype(str)
+ # vmreact_df['subj_id']=vmreact_df['subj_id'].apply(pd.to_numeric)
+ latency_df = pd.read_csv(latency_csv, dtype=str)
+ latency_df = latency_df.drop_duplicates().reset_index()
+ subject_ids = vmreact_df['subj_id'].tolist()
+ vmreact_df = vmreact_df.merge(extra_measures, left_on='subj_id', right_on='subject').drop('subject', axis=1)
+ batch_demo_cols = [x for x in demo_df.columns.tolist() if x in demo_cols]
+ append_demo_cols = [x for x in demo_cols if x not in demo_df.columns.tolist()]
+ demo_df = demo_df[demo_df['subject'].astype(str).isin(subject_ids)][batch_demo_cols]
+ for col in append_demo_cols:
+ demo_df[col] = np.nan
+ # print demo_df
+ # demo_df['subject']=demo_df['subject'].apply(pd.to_numeric)
+ batch_clin_cols = [x for x in clin_raw_df.columns.tolist() if x in clin_raw_cols]
+ append_clin_cols = [x for x in clin_raw_cols if x not in clin_raw_df.columns.tolist()]
+ clin_raw_df = clin_raw_df[clin_raw_df['subject'].astype(str).isin(subject_ids)][batch_clin_cols]
+ for col in sorted(append_clin_cols):
+ clin_raw_df[col] = np.nan
+ # clin_raw_df['subject']=clin_raw_df['subject'].apply(pd.to_numeric)
+ batch_sum_cols = [x for x in sum_df.columns.tolist() if x in sum_cols]
+ append_sum_cols = [x for x in sum_cols if x not in sum_df.columns.tolist()]
+ sum_df = sum_df[sum_df['subject'].astype(str).isin(subject_ids)][batch_sum_cols]
+ for col in sorted(append_sum_cols):
+ sum_df[col] = np.nan
+ # sum_df['subject']=sum_df['subject'].apply(pd.to_numeric)
+ batch_df = demo_df.merge(sum_df, left_on='subject', right_on='subject').drop(
+ ['script.startdate', 'script.starttime'], axis=1)
+ batch_df = batch_df.merge(clin_raw_df, left_on='subject', right_on='subject').drop(
+ ['date_y', 'time_y', 'group_y', 'build_y'], axis=1)
+ batch_df = batch_df.merge(vmreact_df, left_on='subject', right_on='subj_id').drop('subj_id', axis=1)
+ batch_df = batch_df.rename(columns={'date_x': 'date', 'time_x': 'time', 'group_x': 'group', 'build_x': 'build'})
+ # print batch_df
+ print subject_ids
+ latency_df['subjid'] = latency_df['subjid'].astype(str)
+ latency_df['date'] = latency_df['date'].astype(int)
+ batch_df['date'] = batch_df['date'].astype(int)
+ latency_df = latency_df.loc[(latency_df['subjid'].isin(
+ batch_df['subject'].astype(str).tolist()))] # & latency_df['date'].isin(batch_df['date'].tolist()))]
+ latency_df = latency_df.loc[(
+ latency_df['subjid'].isin(batch_df['subject'].astype(str).tolist()) & latency_df['date'].isin(
+ batch_df['date'].tolist()))]
+ batch_df['subject'] = batch_df['subject'].astype(str)
+ batch_df = batch_df.merge(latency_df, left_on='subject', right_on='subjid')
+ batch_df.to_csv(os.path.join(scored_dir, 'mturk_batch' + batch + '_compiled.csv'))
+# os.system('open /Users/cdla/Desktop/scratch/vmreact/2_vmreact/'+'mturk_batch'+batch+'_compiled.csv')
+# In[97]:
+dataframes_to_concat = []
+result = []
+for compiled_csv in glob(os.path.join(scored_dir, '*compiled.csv')):
+ df = pd.read_csv(compiled_csv, dtype=str)
+ dataframes_to_concat.append(df)
+result = pd.concat(dataframes_to_concat).reindex_axis(df.columns.tolist(), axis=1).drop(
+ ['index', 'date_y', 'subjid', 'Unnamed: 0'], axis=1).dropna(how='all', axis=1).drop_duplicates()
+# print result.subject
+result = result[~result.subject.isin(['XXX', 'AVD6HMIO1HLFI', 'A5EU1AQJNC7F2'])]
+result.drop_duplicates(['date_x', 'subject'], inplace=True)
+result = result.drop_duplicates()
+result.to_csv(os.path.join(scored_dir, 'mturk_vmreact_complete_compilation.csv'), index=False)
+# In[76]:
+# In[ ]:
diff --git a/venv/lib/python2.7/site-packages/IPython/lib/tests/__init__.py b/build/lib/vmreact-merges/__init__.py
similarity index 100%
rename from venv/lib/python2.7/site-packages/IPython/lib/tests/__init__.py
rename to build/lib/vmreact-merges/__init__.py
diff --git a/build/lib/vmreact-merges/edits.py b/build/lib/vmreact-merges/edits.py
new file mode 100755
index 0000000..c9e27ba
--- /dev/null
+++ b/build/lib/vmreact-merges/edits.py
@@ -0,0 +1,448 @@
+# coding: utf-8
+# In[3]:
+import collections
+import csv
+import datetime
+from difflib import SequenceMatcher
+from glob import glob
+from math import ceil
+from shutil import copy, move
+import pandas as pd
+from IPython.display import display
+# #Grading Script
+# In[2]:
+def grader(all_subj_data_csv, data_output_raw_csv, data_output_scored_csv, word_corr, p_r):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ subj_listtype = []
+ for idx, row in enumerate(all_subj_csv_lines['subject']):
+ if 'rey_list' in all_subj_csv_lines['trialcode'][idx]:
+ subj_listtype.append([all_subj_csv_lines['subject'][idx], all_subj_csv_lines['trialcode'][idx]])
+ set_subj_listtype = []
+ for subj in subj_listtype:
+ if subj not in set_subj_listtype:
+ set_subj_listtype.append(subj)
+ ## count per list type
+ index_number_resp = dict()
+ for list_type in sorted([x for x in set(all_subj_csv_lines['trialcode']) if 'rey_list' in x]):
+ index_number_resp[list_type] = []
+ for idx, response in enumerate(all_subj_csv_lines['response']):
+ if 'recall_response' in all_subj_csv_lines['trialcode'][idx]:
+ if 'listb' not in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][
+ 1]].append(response.lower().strip())
+ elif 'listb' in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][1][
+ :-1] + 'b'].append(response.lower().strip())
+ counter_dict = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ rey_recall_word_count = collections.Counter(index_number_resp[list_type])
+ counter_dict[list_type] = rey_recall_word_count
+ total_response_for_list = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ total_response_for_list[list_type] = sorted(set(index_number_resp[list_type]))
+ if p_r == 0:
+ rey_word_lists = {
+ 'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school', 'parent', 'moon', 'garden',
+ 'hat', 'farmer', 'nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student', 'mother', 'star', 'painting',
+ 'bag', 'wheat', 'mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase', 'cousin', 'earth', 'stairs',
+ 'dog', 'banana', 'town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart', 'desert', 'face', 'letter', 'bed',
+ 'machine', 'milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove', 'mountain', 'glasses', 'towel',
+ 'cloud', 'boar', 'lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge', 'cliff', 'bottle', 'soap',
+ 'sky', 'ship', 'goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus', 'chin', 'bleach', 'soap', 'hotel',
+ 'donkey', 'spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool', 'forest', 'perfume', 'ladder',
+ 'girl', 'foot', 'shield', 'pie', 'insect', 'ball', 'car']
+ }
+ elif p_r == 1:
+ rey_word_lists = {'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool']
+ }
+ elif p_r == 2:
+ rey_word_lists = {'rey_list_presentation_1a': ['nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['shield', 'pie', 'insect', 'ball', 'car']
+ }
+ with open(word_corr, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ for word_list in sorted(total_response_for_list.keys()):
+ word_corrs = []
+ for word in total_response_for_list[word_list]:
+ wordcorrs = [round(SequenceMatcher(None, word, x).ratio(), 3) for x in rey_word_lists[word_list]]
+ word_corrs.append(wordcorrs)
+ writer.writerow([word, max(wordcorrs), rey_word_lists[word_list][wordcorrs.index(max(wordcorrs))]])
+ csvfile.close()
+ subj_id_list = []
+ subj_only = []
+ for subj in sorted(set(all_subj_csv_lines['subject'])):
+ try:
+ subj_list_type = [all_subj_csv_lines['trialcode'][x] for x in range(len(all_subj_csv_lines['subject']))
+ if (all_subj_csv_lines['subject'][x] == subj) and (
+ 'rey_list_presentation_' in all_subj_csv_lines['trialcode'][x])][0]
+ subj_id_list.append([all_subj_csv_lines['date'][x] + '_' + subj, subj_list_type])
+ subj_only.append(subj)
+ except:
+ print "%s has an error in their data" % subj
+ continue
+ full_raw_data_responses = [
+ [all_subj_csv_lines['date'][x] + '_' + all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ all_responses = []
+ repeats = []
+ list_b_all = []
+ list_a_all = []
+ with open(data_output_raw_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(('subj_id', 'list_type', 'trial', 'response', 'score'))
+ for response in full_raw_data_responses:
+ subj = response[0]
+ list_to_use = [subj_id_list[x][1] for x in range(len(subj_id_list)) if subj_id_list[x][0] == subj][0]
+ list_a_all.append(list_to_use)
+ list_b = list_to_use[:-1] + 'b'
+ list_b_all.append(list_b)
+ if 'listb' in response[1]:
+ if response[2] in rey_word_lists[list_b]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_b]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_b, response[1].split('_')[0], response[2], response[3]
+ else:
+ if response[2] in rey_word_lists[list_to_use]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_to_use]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_to_use, response[1].split('_')[0], response[2], response[3]
+ writer.writerow(new_row)
+ all_responses.append(response)
+ rep = new_row
+ repeats.append(rep)
+ csvfile.close()
+ trial_breaks = []
+ trial_lines = [all_responses[y][1] for y in range(0, len(all_responses))]
+ trial_breaks = [i for i, x in enumerate(trial_lines[0:])
+ if x.split('_')[0] != trial_lines[i - 1].split('_')[0]]
+ trial_breaks = trial_breaks + [len(all_responses)]
+ subj_scores = []
+ final = []
+ final_repeats = []
+ for idx, val in enumerate(trial_breaks[:-1]):
+ score = 0
+ word_list = []
+ for line in all_responses[trial_breaks[idx]:trial_breaks[idx + 1]]:
+ if line[3] == 1:
+ score = score + 1
+ word_list.append(line[2])
+ test = []
+ for idx, word in enumerate(word_list):
+ test.append([SequenceMatcher(None, word, x).ratio() for x in
+ [y for idx2, y in enumerate(word_list) if idx != idx2]])
+ repeats = 0
+ for word in test:
+ word_thresholded = [ceil(x) for x in word if x > 0.8]
+ n = sum(word_thresholded)
+ if n != 0:
+ repeats = repeats + (((n * (n + 1)) - 1) / (n + 1))
+ subj_scores.append([line[0], line[1].split('_')[0], score, repeats])
+ with open(data_output_scored_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(
+ ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'trial6', 'trial7',
+ 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats', 'trial3_#_repeats', 'trial4_#_repeats',
+ 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats'])
+ subj_scores = subj_scores + ['placeholder']
+ for idx, scores in enumerate(sorted(subj_scores[:-1])):
+ scored = str(scores[2] - scores[3])
+ repeat_nm = scores[3]
+ final.append(scored)
+ final_repeats.append(repeat_nm)
+ subj_id = [scores[0]]
+ for idx2, val in enumerate(subj_id_list):
+ if subj_id[0] == subj_id_list[idx2][0]:
+ subj_list = subj_id_list[idx2][1].split('_')[3]
+ final_row = subj_id + [subj_list] + final + final_repeats
+ if scores[0] != sorted(subj_scores)[idx + 1][0]:
+ writer.writerow(final_row)
+ final_row = []
+ subj_id = []
+ final = []
+ final_repeats = []
+ csvfile.close()
+# In[63]:
+# Getting composite scores from scored
+# In[ ]:
+import numpy as np
+import pandas
+import os
+def composite_scores(input_csv, output_csv):
+ scored_data = pandas.read_csv(input_csv)
+ print input_csv
+ df_trials = scored_data.loc[:, 'trial1':'trial7']
+ print df_trials.columns.tolist()
+ composite_scores = pandas.DataFrame()
+ tmp = pandas.DataFrame()
+ composite_scores['total_learning'] = df_trials[['trial1', 'trial2', 'trial3', 'trial4', 'trial5']].apply(
+ lambda row: np.sum(row), axis=1)
+ tmp['test'] = df_trials['trial1'].tolist() * 5
+ composite_scores['corrected_total_learning'] = composite_scores['total_learning'].subtract(tmp['test'])
+ composite_scores['learning_rate'] = df_trials['trial5'].subtract(df_trials['trial1'], axis='rows')
+ composite_scores['proactive_interference'] = df_trials['trial1'].subtract(scored_data['listb'], axis='rows')
+ composite_scores['retroactive_interference'] = df_trials['trial5'].subtract(df_trials['trial6'], axis='rows')
+ composite_scores['forgetting_and_retention'] = df_trials['trial5'].subtract(df_trials['trial7'], axis='rows')
+ composite_scores_transposed = composite_scores.transpose()
+ composite_scores_transposed.to_csv(output_csv, header=True, index=['measure', 'score'])
+ composite_scores.to_csv(output_csv, header=True, index=['measure', 'score'])
+for scored in glob('/Users/cdla/Desktop/scratch/vmreact/2_vmreact/*_scored_data.csv'):
+ composite_scores(scored, scored.replace('_scored_data.csv', '_composite_scores.csv'))
+# In[ ]:
+format = "%Y_%m_%d"
+current_date = datetime.datetime.today()
+date = current_date.strftime(format)
+output_csv_location = '/Users/cdla/Desktop/scratch/vmreact/2_vmreact/'
+for raw in glob('/Users/cdla/Desktop/scratch/vmreact/1_rawdata/*/*raw.csv'):
+ raw_data = raw
+ demo_data = raw.replace('raw.csv', 'demo.csv')
+ summary_data = raw.replace('raw.csv', 'summary.csv')
+ prefix = 'mturk_' + os.path.basename(os.path.dirname(raw_data)).split('_')[1] + '_'
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations.csv'), 0)
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data_primacy.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data_primacy.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations_primacy.csv'), 1)
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data_recency.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data_recency.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations_recency.csv'), 2)
+ copy(demo_data, os.path.join(output_csv_location, prefix + 'demo.csv'))
+ copy(summary_data, os.path.join(output_csv_location, prefix + 'summary.csv'))
+# In[4]:
+scored_dir = '/Users/cdla/Desktop/scratch/vmreact/2_vmreact/'
+for scored_csv in glob(os.path.join(scored_dir, '*parsed*')):
+ with open(scored_csv, 'rb') as source:
+ rdr = csv.reader(source)
+ with open(os.path.join(scored_dir, 'tmp.csv'), 'wb') as result:
+ wtr = csv.writer(result)
+ for r in rdr:
+ wtr.writerow(r[0:18])
+ move(os.path.join(scored_dir, 'tmp.csv'), scored_csv)
+ print scored_csv
+# In[174]:
+scored_dir = '/Users/cdla/Desktop/scratch/vmreact/2_vmreact/'
+demo_cols = []
+clin_raw_cols = []
+sum_cols = ['script.startdate', 'script.starttime', 'subject',
+ 'expressions.gad_7_total', 'expressions.phq_total', 'expressions.pcl_4_total',
+ 'expressions.pcl_total_hybridscore_corrected', 'expressions.pcl_total_hybridscore_uncorrected']
+scored_cols = ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3',
+ 'trial4', 'trial5', 'trial6', 'trial7', 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats',
+ 'trial3_#_repeats', 'trial4_#_repeats', 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats']
+composite_cols = ['subject', 'total_learning', 'corrected_total_learning', 'learning_rate',
+ 'proactive_interference', 'retroactive_interference', 'forgetting_and_retention']
+for batch in range(1, 9):
+ batch = str(batch)
+ demo = os.path.join(scored_dir, 'mturk_batch' + batch + '_demo.csv')
+ clin_raw = os.path.join(scored_dir, 'mturk_batch' + batch + '_end.csv')
+ sum = os.path.join(scored_dir, 'mturk_batch' + batch + '_summary.csv')
+ scored = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data.csv')
+ composite = os.path.join(scored_dir, 'mturk_batch' + batch + '_composite_scores.csv')
+ demo_df = pd.read_csv(demo, dtype=str)
+ demo_cols.extend([x for x in demo_df.columns.tolist() if (
+ 'latency' not in x and 'online' not in x and 'Unnamed' not in x and 'time_comp' not in x and 'subj_id' not in x)])
+ print batch
+ clin_raw_df = pd.read_csv(clin_raw, dtype=str)
+ clin_raw_cols.extend(
+ [x for x in clin_raw_df.columns.tolist() if 'latency' not in x and 'end' not in x and 'Unnamed' not in x])
+ sum_df = pd.read_csv(sum, dtype=str)
+ scored_df = pd.read_csv(scored, dtype=str)
+ comp_df = pd.read_csv(composite, dtype=str).rename(index=str, columns={'Unnamed: 0': 'subject'})
+demo_cols = list(set(demo_cols))
+clin_raw_cols = list(set(clin_raw_cols))
+print demo_cols
+print clin_raw_cols
+# need to get latency values,
+# use the scored to set the subject ids.
+# append composite to scored_cols since they're in the same order and composite doesn't have subject ids
+# summary - use script.subjectid
+# demo - use subject
+# clin_raw - use subject
+# In[281]:
+scored_dir = '/Users/cdla/Desktop/scratch/vmreact/2_vmreact/'
+latency_csv = os.path.join(scored_dir, 'vmreact_latency_summary.csv')
+# for batch in range(1,9):
+for batch in [8]:
+ batch_df = pd.DataFrame()
+ batch = str(batch)
+ print 'mturk_batch' + batch
+ demo = os.path.join(scored_dir, 'mturk_batch' + batch + '_demo.csv')
+ clin_raw = os.path.join(scored_dir, 'mturk_batch' + batch + '_end.csv')
+ sum = os.path.join(scored_dir, 'mturk_batch' + batch + '_summary.csv')
+ scored = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data.csv')
+ composite = os.path.join(scored_dir, 'mturk_batch' + batch + '_composite_scores.csv')
+ demo_df = pd.read_csv(demo, dtype=str)
+ clin_raw_df = pd.read_csv(clin_raw, dtype=str)
+ sum_df = pd.read_csv(sum, dtype=str).rename(index=str, columns={'script.subjectid': 'subject'})
+ scored_df = pd.read_csv(scored)
+ comp_df = pd.read_csv(composite).rename(index=str, columns={'Unnamed: 0': 'subject'})
+ comp_df['subject'] = comp_df['subject'].apply(int)
+ vmreact_df = pd.merge(scored_df, comp_df, left_index=True, right_on='subject', how='left').drop('subject', axis=1)
+ vmreact_df['subj_id'] = vmreact_df['subj_id'].astype(str)
+ # vmreact_df['subj_id']=vmreact_df['subj_id'].apply(pd.to_numeric)
+ latency_df = pd.read_csv(latency_csv, dtype=str)
+ latency_df = latency_df.drop_duplicates().reset_index()
+ subject_ids = vmreact_df['subj_id'].tolist()
+ batch_demo_cols = [x for x in demo_df.columns.tolist() if x in demo_cols]
+ append_demo_cols = [x for x in demo_cols if x not in demo_df.columns.tolist()]
+ demo_df = demo_df[demo_df['subject'].astype(str).isin(subject_ids)][batch_demo_cols]
+ for col in append_demo_cols:
+ demo_df[col] = np.nan
+ # print demo_df
+ # demo_df['subject']=demo_df['subject'].apply(pd.to_numeric)
+ batch_clin_cols = [x for x in clin_raw_df.columns.tolist() if x in clin_raw_cols]
+ append_clin_cols = [x for x in clin_raw_cols if x not in clin_raw_df.columns.tolist()]
+ clin_raw_df = clin_raw_df[clin_raw_df['subject'].astype(str).isin(subject_ids)][batch_clin_cols]
+ for col in sorted(append_clin_cols):
+ clin_raw_df[col] = np.nan
+ # clin_raw_df['subject']=clin_raw_df['subject'].apply(pd.to_numeric)
+ batch_sum_cols = [x for x in sum_df.columns.tolist() if x in sum_cols]
+ append_sum_cols = [x for x in sum_cols if x not in sum_df.columns.tolist()]
+ sum_df = sum_df[sum_df['subject'].astype(str).isin(subject_ids)][batch_sum_cols]
+ for col in sorted(append_sum_cols):
+ sum_df[col] = np.nan
+ # sum_df['subject']=sum_df['subject'].apply(pd.to_numeric)
+ batch_df = demo_df.merge(sum_df, left_on='subject', right_on='subject').drop(
+ ['script.startdate', 'script.starttime'], axis=1)
+ batch_df = batch_df.merge(clin_raw_df, left_on='subject', right_on='subject').drop(
+ ['date_y', 'time_y', 'group_y', 'build_y'], axis=1)
+ batch_df = batch_df.merge(vmreact_df, left_on='subject', right_on='subj_id').drop('subj_id', axis=1)
+ batch_df = batch_df.rename(columns={'date_x': 'date', 'time_x': 'time', 'group_x': 'group', 'build_x': 'build'})
+ # print batch_df
+ print subject_ids
+ latency_df['subjid'] = latency_df['subjid'].astype(str)
+ latency_df['date'] = latency_df['date'].astype(int)
+ batch_df['date'] = batch_df['date'].astype(int)
+ latency_df = latency_df.loc[(latency_df['subjid'].isin(
+ batch_df['subject'].astype(str).tolist()))] # & latency_df['date'].isin(batch_df['date'].tolist()))]
+ latency_df = latency_df.loc[(
+ latency_df['subjid'].isin(batch_df['subject'].astype(str).tolist()) & latency_df['date'].isin(
+ batch_df['date'].tolist()))]
+ batch_df['subject'] = batch_df['subject'].astype(str)
+ batch_df = batch_df.merge(latency_df, left_on='subject', right_on='subjid')
+ batch_df.to_csv(os.path.join(scored_dir, 'mturk_batch' + batch + '_compiled.csv'))
+ os.system('open /Users/cdla/Desktop/scratch/vmreact/2_vmreact/' + 'mturk_batch' + batch + '_compiled.csv')
+# In[318]:
+dataframes_to_concat = []
+result = []
+for compiled_csv in glob(os.path.join(scored_dir, '*compiled.csv')):
+ df = pd.read_csv(compiled_csv, dtype=str)
+ dataframes_to_concat.append(df)
+result = pd.concat(dataframes_to_concat).reindex_axis(df.columns.tolist(), axis=1).drop(
+ ['index', 'date_y', 'subjid', 'Unnamed: 0'], axis=1).dropna(how='all', axis=1).drop_duplicates()
+# print result.subject
+result = result[~result.subject.isin(['XXX', 'AVD6HMIO1HLFI', 'A5EU1AQJNC7F2'])]
+result.drop_duplicates(['date_x', 'subject'])
+result.to_csv(os.path.join(scored_dir, 'mturk_vmreact_complete_compilation.csv'), index=False)
+# In[ ]:
diff --git a/build/lib/vmreact-merges/morevmreact.py b/build/lib/vmreact-merges/morevmreact.py
new file mode 100755
index 0000000..fc1d5ee
--- /dev/null
+++ b/build/lib/vmreact-merges/morevmreact.py
@@ -0,0 +1,122 @@
+# coding: utf-8
+# In[40]:
+import os
+from glob import glob
+import pandas as pd
+# In[41]:
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/vmreact/2_vmreact/'
+parsed_list = []
+for batch in range(1, 9):
+ batch = str(batch)
+ parsed = os.path.join(scored_dir, 'mturk_batch' + batch + '_parsed_raw_data.csv')
+ parsed_df = pd.read_csv(parsed, dtype=str)
+ parsed_list.append(parsed_df)
+ parsed_df['identifier'] = parsed_df['subj_id'] + '_' + parsed.split('_')[-4]
+# In[42]:
+all_parsed = pd.concat(parsed_list, axis=0)
+all_parsed_df = pd.DataFrame(data=all_parsed)
+cols = ['subj_id', 'identifier', 'list_type', 'trial', 'response', 'score']
+final_csv = pd.DataFrame(data=all_parsed_df, columns=cols)
+# final_csv.to_csv(os.path.join(scored_dir,'parsed_raw_with_errors.csv'))
+# In[43]:
+zero = final_csv.loc[final_csv['score'] == '0']
+incorrect_df = pd.DataFrame(data=zero)
+# incorrect_df.to_csv(os.path.join(scored_dir,'incorrect_response.csv'))
+# In[56]:
+rey_word_lists = {
+ 'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school', 'parent', 'moon', 'garden',
+ 'hat', 'farmer', 'nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student', 'mother', 'star', 'painting',
+ 'bag', 'wheat', 'mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase', 'cousin', 'earth', 'stairs',
+ 'dog', 'banana', 'town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart', 'desert', 'face', 'letter', 'bed',
+ 'machine', 'milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove', 'mountain', 'glasses', 'towel',
+ 'cloud', 'boar', 'lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge', 'cliff', 'bottle', 'soap',
+ 'sky', 'ship', 'goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus', 'chin', 'bleach', 'soap', 'hotel',
+ 'donkey', 'spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool', 'forest', 'perfume', 'ladder',
+ 'girl', 'foot', 'shield', 'pie', 'insect', 'ball', 'car']
+word_lists_df = pd.DataFrame.from_dict(rey_word_lists)
+# In[ ]:
+for lists, response in incorrect_df.groupby(level=1):
+ print response
+# In[55]:
+# In[45]:
+cols = ['typing_test_openended_sentence1', 'typing_speed_next_trial', 'typing_test_openended_sentence2',
+ 'typing_test_error2', 'typing_test_openended_sentence2', 'typing_speed_next_trial_2',
+ 'typing_test_openended_sentence3']
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/vmreact/1_rawdata/'
+typing_test_list = []
+for scored_csv in glob(os.path.join(scored_dir, '*mturk*', '*raw.csv')):
+ raw_csv = pd.read_csv(scored_csv, dtype=str)
+ typing_test = raw_csv.loc[raw_csv['blockcode'] == 'typing_test']
+ typing_test_list.append(typing_test)
+# typingtest=df.loc[df['trialcode'].str.contains(trial),'trialcode']
+combined = pd.concat(typing_test_list, axis=0)
+# In[46]:
+typing_test_cols = combined.columns.tolist()
+final_cols = ['subject', 'date', 'blockcode', 'trialcode', 'response', 'latency']
+typing_test_only = combined[final_cols]
+unique_cols = combined['trialcode'].unique().tolist()
+# In[47]:
+typing_test_only.to_csv(os.path.join(scored_dir, 'typing_test_raw.csv'))
+# In[48]:
+# for ix, response in typing_test_only.groupby('subject'):
+# if (response.response != 57).any():
+# print response['trialcode'][0] ==
+# print response.subject.head(),response.response.unique()
+# In[38]:
+sentence_1 = ['typing_test_openended_sentence1', 'typing_speed_next_trial']
+sentence_2 = ['typing_test_openended_sentence2', 'typing_speed_next_trial_2']
+sentence_3 = ['typing_test_openended_sentence3', 'typing_speed_next_trial_3']
+for i, sentence in typing_test_only.groupby('subject'):
+ print sentence
+# In[ ]:
+# In[ ]:
diff --git a/build/lib/vmreact-merges/numbers_figures_vmreact.py b/build/lib/vmreact-merges/numbers_figures_vmreact.py
new file mode 100755
index 0000000..da47291
--- /dev/null
+++ b/build/lib/vmreact-merges/numbers_figures_vmreact.py
@@ -0,0 +1,213 @@
+# coding: utf-8
+# In[ ]:
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+# In[ ]:
+data_dir = '/Users/lillyel-said/Desktop/vmreact/vmreact/2_vmreact/'
+all_data = pd.read_csv(os.path.join(data_dir, 'mturk_vmreact_complete_compilation.csv'))
+indexed_df = pd.DataFrame(data=all_data.set_index(['gender_response', 'age_range']))
+# In[ ]:
+print indexed_df['online_sr_q2option1_response'].value_counts()
+print indexed_df['online_sr_q2option2_response'].value_counts()
+print indexed_df['online_sr_q2option3_response'].value_counts()
+ column=['expressions.pcl_total_hybridscore_corrected', 'expressions.phq_total', 'expressions.pcl_4_total',
+ 'gad_7_q2_response'])
+# indexed_df.hist(column=['online_sr_q2option1_response','online_sr_q2option2_response','online_sr_q2option3_response'])
+# In[ ]:
+for idx, data in indexed_df.groupby(level=[0, 1]):
+ print idx
+ print data['education_response'].T.value_counts()
+# data.hist(column=['expressions.pcl_total_hybridscore_corrected','expressions.phq_total','expressions.pcl_4_total','gad_7_q2_response'])
+# In[ ]:
+y = ['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'listb', 'trial6', 'trial7']
+colors = ['lightskyblue', 'lightskyblue', 'lightskyblue', 'lightskyblue', 'lightskyblue', '#96D38C', '#E1396C',
+ '#E1396C']
+columns = [c for c in y]
+bins = range(0, 17)
+for idx, val in indexed_df.groupby(level=[0, 1]):
+ if len(val[columns]) > 3:
+ trials = val.loc[:, 'listb':'trial7']
+ fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(14, 12))
+ fig = trials.plot.hist(y=y, bins=bins, subplots=True, ax=axes, legend=True, title=idx, xticks=range(0, 16),
+ xlim=(0, 16), fontsize=10, color=colors)
+ for (m, n), subplot in np.ndenumerate(axes):
+ ymin, ymax = subplot.get_ylim()
+ subplot.set_ylim(0, (ymax + 1))
+ else:
+ print idx, 'n too small'
+# In[ ]:
+# all_data.rename(columns={'gender_response':'gender','age_textbox_response':'age','date_x':'date'},inplace=True)
+# In[ ]:
+trial_latency_cols = ['subject', 'date']
+ [col for col in indexed_df.columns.tolist() if 'firstcharlatency' in col or 'response_latency' in col])
+first_char_df = pd.DataFrame(data=all_data,
+ columns=[c for c in trial_latency_cols if 'firstchar' in c or 'subject' in c])
+recall_df = pd.DataFrame(data=all_data, columns=[r for r in trial_latency_cols if 'response' in r or 'subject' in r])
+# In[ ]:
+trials = ['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'listb', 'trial6', 'trial7']
+new = pd.DataFrame()
+for t in trials:
+ new[t + '_initiation_latency'] = all_data[t + '_values.response_latency'].subtract(
+ all_data[t + '_values.recall_firstcharlatency'])
+# In[ ]:
+pd.concat([all_data, new], axis=1).to_csv(os.path.join(data_dir, 'mturk_vmreact_complete_compilation_initiation.csv'))
+# In[ ]:
+import os
+import numpy as np
+import pandas as pd
+from glob import glob
+# In[ ]:
+data_dir = '/Users/lillyel-said/Desktop/vmreact/final_inquisit_launches/launches/broken_up_by_each_launch/0612217_reyravlt_antr_pilot4'
+output_dir = '/Users/lillyel-said/Desktop/vmreact/final_inquisit_launches/launches/broken_up_by_each_launch/0612217_reyravlt_antr_pilot4/test'
+# In[ ]:
+trials = ['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'listb', 'trial6', 'trial7']
+cols = ['values.response_latency', 'expressions.trial_recall_word_latency',
+ 'values.recall_firstcharlatency', 'values.recall_lastcharlatency']
+column_titles = ['subjid', 'date']
+for trial in trials:
+ for meas in cols:
+ column_titles.append(trial + "_" + meas)
+final_csv = [column_titles]
+print cols
+# In[ ]:
+total_columns = []
+for data_file in glob(os.path.join(data_dir, '*raw.csv')):
+ data_df = pd.read_csv(data_file, dtype=str)
+ data_df.loc[data_df['response'] == ' ', 'trialcode'] = 'trial_confirmation'
+ for trial in ['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'trial6', 'trial7', 'trial8', 'listb']:
+ data_df.loc[data_df['trialcode'].str.contains(trial), 'trialcode'] = trial
+ data_df.rename(columns
+ ={'latency': 'values.response_latency'}, inplace=True)
+ for col in data_df.columns.tolist():
+ if col not in total_columns:
+ total_columns.append(col)
+print sorted(total_columns)
+for t in trials:
+ new[t + '_initiation_latency'] = all_data[t + '_values.response_latency'].subtract(
+ all_data[t + '_values.recall_firstcharlatency'])
+# In[ ]:
+cols = ['typing_test_openended_sentence1', 'typing_speed_next_trial', 'typing_test_openended_sentence2',
+ 'typing_test_error2', 'typing_test_openended_sentence2', 'typing_speed_next_trial_2',
+ 'typing_test_openended_sentence3']
+for data_file in glob(os.path.join(data_dir, '*raw.csv')):
+ df = pd.read_csv(data_file, dtype=str)
+ typing_test = df.loc[df['blockcode'] == 'typing_test']
+# for trial in cols:
+# typingtest=df.loc[df['trialcode'].str.contains(trial),'trialcode']
+# print df.loc[df['trialcode']==trial]
+# In[ ]:
+for data_file in glob(os.path.join(data_dir, '*raw.csv')):
+ data_df = pd.read_csv(data_file, dtype=str)
+ data_df.loc[data_df['response'] == ' ', 'trialcode'] = 'trial_confirmation'
+ for trial in ['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'trial6', 'trial7', 'trial8', 'listb']:
+ data_df.loc[data_df['trialcode'].str.contains(trial), 'trialcode'] = trial
+ data_df.rename(columns={'latency': 'values.response_latency'}, inplace=True)
+ subj_list = data_df.loc[data_df['trialcode'] == 'trial8', 'subject'].unique()
+ if len(subj_list) > 0:
+ data_df.loc[(data_df['trialcode'] == 'trial6') & (data_df['subject'].isin(subj_list)), 'trialcode'] = 'listb'
+ data_df.loc[(data_df['trialcode'] == 'trial7') & (data_df['subject'].isin(subj_list)), 'trialcode'] = 'trial6'
+ data_df.loc[(data_df['trialcode'] == 'trial8') & (data_df['subject'].isin(subj_list)), 'trialcode'] = 'trial7'
+ for subj, subj_df in data_df.groupby(['subject']):
+ measures = []
+ for trial, trial_df in subj_df.groupby(['trialcode']):
+ if trial in ['trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'listb', 'trial6', 'trial7']:
+ trial_measures = [np.nan] * 4
+ for idx, meas in enumerate(cols):
+ if 'values.recall' in meas:
+ print subj, meas, trial_df['values.recall_lastcharlatency'].astype(float).subtract(
+ trial_df['values.recall_firstcharlatency'].astype(float))
+ try:
+ trial_measures[idx] = round(trial_df[meas].astype('float').mean(), 4)
+ # print meas,trial_measures[idx]
+ # subj_df[meas].subtract(subj_df[trial+'_values.recall_firstcharlatency'])
+ x = trial_df['values.recall_lastcharlatency'][trial_measures].subtract(
+ trial_df['values.recall_firstcharlatency'][trial_measures], axis=1)
+ except:
+ trial_measures[idx] = np.nan
+ continue
+ measures.append([trial] + trial_measures)
+ elif trial == 'trial_confirmation':
+ confirmation_mean = trial_df['values.response_latency'].astype(float).mean()
+ confirmation_vals = trial_df['values.response_latency'].astype(float)
+ subj_line = [subj, subj_df['date'].unique().astype(str)[0]]
+# for trial in ['trial1','trial2','trial3','trial4','trial5','listb','trial6','trial7']:
+# try:
+# trial_idx=[meas[0] for meas in measures].index(trial)
+# subj_line.extend(measures[int(trial_idx)][1:])
+# except:
+# subj_line.exteend(4*np.nan)
+# continue
+# print confirmation_mean,confirmation_vals
+# final_csv.append(subj_line)
+# In[ ]:
+# In[ ]:
diff --git a/build/lib/vmreact-merges/single_inq_subj.py b/build/lib/vmreact-merges/single_inq_subj.py
new file mode 100755
index 0000000..6904796
--- /dev/null
+++ b/build/lib/vmreact-merges/single_inq_subj.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+# In[ ]:
+# import os
+# import numpy as np
+# import pandas as pd
+# import csv
+# from glob import glob
+# In[ ]:
+# data_dir='/Users/lillyel-said/Desktop/vmreact/vmreact/1_rawdata/data/'
+# output_dir='/Users/lillyel-said/Desktop/vmreact/vmreact/1_rawdata/data/'
+# In[ ]:
+# trials=['trial1','trial2','trial3','trial4','trial5','listb','trial6','trial7']
+# cols=['values.response_latency', 'expressions.trial_recall_word_latency',
+# 'values.recall_firstcharlatency', 'values.recall_lastcharlatency']
+# column_titles=['subjid','date']
+# for trial in trials:
+# for meas in cols:
+# column_titles.append(trial+"_"+meas)
+# final_csv=[column_titles]
+# In[ ]:
+# total_columns=[]
+# for data_file in glob(os.path.join(data_dir,'*')):
+# data_df=pd.read_csv(data_file,dtype=str)
+# data_df.loc[data_df['response']==' ','trialcode'] = 'trial_confirmation'
+# for trial in ['trial1','trial2','trial3','trial4','trial5','trial6','trial7','trial8','listb']:
+# data_df.loc[data_df['trialcode'].str.contains(trial),'trialcode']=trial
+# data_df.rename(columns
+# ={'latency':'values.response_latency'},inplace=True)
+# for col in data_df.columns.tolist():
+# if col not in total_columns:
+# total_columns.append(col)
+# print sorted(total_columns)
+# In[ ]:
+# for data_file in glob(os.path.join(data_dir,'*.csv')):
+# data_df=pd.read_csv(data_file,dtype=str)
+# data_df.loc[data_df['response']==' ','trialcode'] = 'trial_confirmation'
+# for trial in ['trial1','trial2','trial3','trial4','trial5','trial6','trial7','trial8','listb']:
+# data_df.loc[data_df['trialcode'].str.contains(trial),'trialcode']=trial
+# data_df.rename(columns={'latency':'values.response_latency'},inplace=True)
+# subj_list=data_df.loc[data_df['trialcode'] == 'trial8','subject'].unique()
+# if len(subj_list) > 0:
+# data_df.loc[(data_df['trialcode'] == 'trial6') & (data_df['subject'].isin(subj_list)),'trialcode']='listb'
+# data_df.loc[(data_df['trialcode'] == 'trial7') & (data_df['subject'].isin(subj_list)),'trialcode']='trial6'
+# data_df.loc[(data_df['trialcode'] == 'trial8') & (data_df['subject'].isin(subj_list)),'trialcode']='trial7'
+# for subj,subj_df in data_df.groupby(['subject']):
+# measures=[]
+# for trial,trial_df in subj_df.groupby(['trialcode']):
+# if trial in ['trial1','trial2','trial3','trial4','trial5','listb','trial6','trial7']:
+# trial_measures=[np.nan]*4
+# for idx,meas in enumerate(cols):
+# try:
+# trial_measures[idx]=round(trial_df[meas].astype('float').mean(),4)
+# xnew[trial+'_'+meas]=trial_df['values.response_latency']
+# for t in trials:
+# [t+'_initiation_latency']=all_data[t+'_values.response_latency'].subtract(all_data[t+'_values.recall_firstcharlatency'])
+# except:
+# trial_measures[idx]=np.nan
+# continue
+# break
+# measures.append([trial] + trial_measures)
+# elif trial == 'trial_confirmation':
+# confirmation_mean=trial_df['values.response_latency'].astype(float).mean()
+# confirmation_vals=trial_df['values.response_latency'].astype(float)
+# subj_line=[subj,subj_df['date'].unique().astype(str)[0]]
+# for trial in ['trial1','trial2','trial3','trial4','trial5','listb','trial6','trial7']:
+# try:
+# trial_idx=[meas[0] for meas in measures].index(trial)
+# subj_line.extend(measures[int(trial_idx)][1:])
+# except:
+# subj_line.exteend(4*np.nan)
+# continue
+# print confirmation_mean,confirmation_vals
+# final_csv.append(subj_line)
diff --git a/build/lib/vmreact-merges/vmreact_compilation.py b/build/lib/vmreact-merges/vmreact_compilation.py
new file mode 100755
index 0000000..fbd2ef9
--- /dev/null
+++ b/build/lib/vmreact-merges/vmreact_compilation.py
@@ -0,0 +1,810 @@
+# coding: utf-8
+# In[2]:
+import datetime
+from glob import glob
+from shutil import copy, move
+import pandas as pd
+from IPython.display import display
+# #Grading Script
+# In[102]:
+def grader(all_subj_data_csv, data_output_raw_csv, data_output_scored_csv, word_corr, p_r):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ subj_listtype = []
+ for idx, row in enumerate(all_subj_csv_lines['subject']):
+ if 'rey_list' in all_subj_csv_lines['trialcode'][idx]:
+ subj_listtype.append([all_subj_csv_lines['subject'][idx], all_subj_csv_lines['trialcode'][idx]])
+ set_subj_listtype = []
+ for subj in subj_listtype:
+ if subj not in set_subj_listtype:
+ set_subj_listtype.append(subj)
+ ## count per list type
+ index_number_resp = dict()
+ for list_type in sorted([x for x in set(all_subj_csv_lines['trialcode']) if 'rey_list' in x]):
+ index_number_resp[list_type] = []
+ for idx, response in enumerate(all_subj_csv_lines['response']):
+ if 'recall_response' in all_subj_csv_lines['trialcode'][idx]:
+ if 'listb' not in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][
+ 1]].append(response.lower().strip())
+ elif 'listb' in all_subj_csv_lines['trialcode'][idx]:
+ index_number_resp[
+ set_subj_listtype[[x[0] for x in set_subj_listtype].index(all_subj_csv_lines['subject'][idx])][1][
+ :-1] + 'b'].append(response.lower().strip())
+ counter_dict = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ rey_recall_word_count = collections.Counter(index_number_resp[list_type])
+ counter_dict[list_type] = rey_recall_word_count
+ total_response_for_list = dict()
+ for list_type in sorted(index_number_resp.keys()):
+ total_response_for_list[list_type] = sorted(set(index_number_resp[list_type]))
+ if p_r == 0:
+ rey_word_lists = {
+ 'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school', 'parent', 'moon', 'garden',
+ 'hat', 'farmer', 'nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student', 'mother', 'star', 'painting',
+ 'bag', 'wheat', 'mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase', 'cousin', 'earth', 'stairs',
+ 'dog', 'banana', 'town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart', 'desert', 'face', 'letter', 'bed',
+ 'machine', 'milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove', 'mountain', 'glasses', 'towel',
+ 'cloud', 'boar', 'lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge', 'cliff', 'bottle', 'soap',
+ 'sky', 'ship', 'goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus', 'chin', 'bleach', 'soap', 'hotel',
+ 'donkey', 'spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool', 'forest', 'perfume', 'ladder',
+ 'girl', 'foot', 'shield', 'pie', 'insect', 'ball', 'car']
+ }
+ elif p_r == 1:
+ rey_word_lists = {'rey_list_presentation_1a': ['drum', 'curtain', 'bell', 'coffee', 'school'],
+ 'rey_list_presentation_2a': ['pipe', 'wall', 'alarm', 'sugar', 'student'],
+ 'rey_list_presentation_3a': ['violin', 'tree', 'scarf', 'ham', 'suitcase'],
+ 'rey_list_presentation_4a': ['doll', 'mirror', 'nail', 'sailor', 'heart'],
+ 'rey_list_presentation_1b': ['desk', 'ranger', 'bird', 'shoe', 'stove'],
+ 'rey_list_presentation_2b': ['bench', 'officer', 'cage', 'sock', 'fridge'],
+ 'rey_list_presentation_3b': ['orange', 'table', 'toad', 'corn', 'bus'],
+ 'rey_list_presentation_4b': ['dish', 'jester', 'hill', 'coat', 'tool']
+ }
+ elif p_r == 2:
+ rey_word_lists = {'rey_list_presentation_1a': ['nose', 'turkey', 'color', 'house', 'river'],
+ 'rey_list_presentation_2a': ['mouth', 'chicken', 'sound', 'door', 'stream'],
+ 'rey_list_presentation_3a': ['town', 'radio', 'hunter', 'bucket', 'field'],
+ 'rey_list_presentation_4a': ['milk', 'helmet', 'music', 'horse', 'road'],
+ 'rey_list_presentation_1b': ['lamb', 'gun', 'pencil', 'church', 'fish'],
+ 'rey_list_presentation_2b': ['goat', 'bullet', 'paper', 'chapel', 'crab'],
+ 'rey_list_presentation_3b': ['spider', 'money', 'book', 'soldier', 'padlock'],
+ 'rey_list_presentation_4b': ['shield', 'pie', 'insect', 'ball', 'car']
+ }
+ with open(word_corr, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ for word_list in sorted(total_response_for_list.keys()):
+ word_corrs = []
+ for word in total_response_for_list[word_list]:
+ wordcorrs = [round(SequenceMatcher(None, word, x).ratio(), 3) for x in rey_word_lists[word_list]]
+ word_corrs.append(wordcorrs)
+ writer.writerow([word, max(wordcorrs), rey_word_lists[word_list][wordcorrs.index(max(wordcorrs))]])
+ csvfile.close()
+ subj_id_list = []
+ subj_only = []
+ for subj in sorted(set(all_subj_csv_lines['subject'])):
+ try:
+ subj_list_type = [all_subj_csv_lines['trialcode'][x] for x in range(len(all_subj_csv_lines['subject']))
+ if (all_subj_csv_lines['subject'][x] == subj) and (
+ 'rey_list_presentation_' in all_subj_csv_lines['trialcode'][x])][0]
+ subj_id_list.append([subj, subj_list_type])
+ subj_only.append(subj)
+ except:
+ print "%s has an error in their data" % subj
+ continue
+ full_raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ all_responses = []
+ repeats = []
+ list_b_all = []
+ list_a_all = []
+ with open(data_output_raw_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(('subj_id', 'list_type', 'trial', 'response', 'score'))
+ for response in full_raw_data_responses:
+ subj = response[0]
+ list_to_use = [subj_id_list[x][1] for x in range(len(subj_id_list)) if subj_id_list[x][0] == subj][0]
+ list_a_all.append(list_to_use)
+ list_b = list_to_use[:-1] + 'b'
+ list_b_all.append(list_b)
+ if 'listb' in response[1]:
+ if response[2] in rey_word_lists[list_b]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_b]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_b, response[1].split('_')[0], response[2], response[3]
+ else:
+ if response[2] in rey_word_lists[list_to_use]:
+ response.append(1)
+ else:
+ if any(n > 0.8 for n in
+ [SequenceMatcher(None, response[2], x).ratio() for x in rey_word_lists[list_to_use]]):
+ response.append(1)
+ else:
+ response.append(0)
+ new_row = response[0], list_to_use, response[1].split('_')[0], response[2], response[3]
+ writer.writerow(new_row)
+ all_responses.append(response)
+ rep = new_row
+ repeats.append(rep)
+ csvfile.close()
+ trial_breaks = []
+ trial_lines = [all_responses[y][1] for y in range(0, len(all_responses))]
+ trial_breaks = [i for i, x in enumerate(trial_lines[0:])
+ if x.split('_')[0] != trial_lines[i - 1].split('_')[0]]
+ trial_breaks = trial_breaks + [len(all_responses)]
+ subj_scores = []
+ final = []
+ final_repeats = []
+ for idx, val in enumerate(trial_breaks[:-1]):
+ score = 0
+ word_list = []
+ for line in all_responses[trial_breaks[idx]:trial_breaks[idx + 1]]:
+ if line[3] == 1:
+ score = score + 1
+ word_list.append(line[2])
+ test = []
+ for idx, word in enumerate(word_list):
+ test.append([SequenceMatcher(None, word, x).ratio() for x in
+ [y for idx2, y in enumerate(word_list) if idx != idx2]])
+ repeats = 0
+ for word in test:
+ word_thresholded = [ceil(x) for x in word if x > 0.8]
+ n = sum(word_thresholded)
+ if n != 0:
+ repeats = repeats + (((n * (n + 1)) - 1) / (n + 1))
+ subj_scores.append([line[0], line[1].split('_')[0], score, repeats])
+ with open(data_output_scored_csv, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(
+ ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3', 'trial4', 'trial5', 'trial6', 'trial7',
+ 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats', 'trial3_#_repeats', 'trial4_#_repeats',
+ 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats'])
+ subj_scores = subj_scores + ['placeholder']
+ for idx, scores in enumerate(sorted(subj_scores[:-1])):
+ scored = str(scores[2] - scores[3])
+ repeat_nm = scores[3]
+ final.append(scored)
+ final_repeats.append(repeat_nm)
+ subj_id = [scores[0]]
+ for idx2, val in enumerate(subj_id_list):
+ if subj_id[0] == subj_id_list[idx2][0]:
+ subj_list = subj_id_list[idx2][1].split('_')[3]
+ final_row = subj_id + [subj_list] + final + final_repeats
+ if scores[0] != sorted(subj_scores)[idx + 1][0]:
+ writer.writerow(final_row)
+ final_row = []
+ subj_id = []
+ final = []
+ final_repeats = []
+ csvfile.close()
+# #demo and age range function
+# In[103]:
+def demo_and_summary(all_subj_data_csv, demographic_data, final_summary_csv, frequency_count, subj_age_agerange_gender,
+ sr_responses, summary_ant_scores):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ with open(demographic_data, 'U') as file:
+ input_demo_sr_q_csv = csv.reader(file)
+ input_demo_sr_q_csv = map(list, zip(*input_demo_sr_q_csv))
+ demographic_data = dict((rows[0], rows[1:]) for rows in (input_demo_sr_q_csv))
+ with open(final_summary_csv, 'U') as file:
+ final_summary_lines = csv.reader(file)
+ final_summary_lines = map(list, zip(*final_summary_lines))
+ rey_summary = dict((rows[0], rows[1:]) for rows in (final_summary_lines))
+ age_ranges = {
+ '16-19': range(16, 20, 1),
+ '20-29': range(20, 30, 1),
+ '30-39': range(30, 40, 1),
+ '40-49': range(40, 50, 1),
+ '50-59': range(50, 60, 1),
+ '57-69': range(57, 70, 1),
+ '70-79': range(70, 80, 1),
+ '76-89': range(76, 90, 1)
+ }
+ subj_id_list_demo = []
+ subj_id_only_demo = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only_demo.append(subject)
+ subj_id_list_combined = [demographic_data['subject'][x] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subject]
+ subj_id_list_demo.append(subj_id_list_combined)
+ subj_id_combined = [(idx, val) for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_val = []
+ key_val_all = []
+ for key in sorted(demographic_data.keys()):
+ for value in sorted(demographic_data[key]):
+ key_val_all.append([key, value])
+ if 'subject' in key:
+ subj_val.append(value)
+ else:
+ continue
+ subj_id_with_index = list()
+ for subj_num in subj_val:
+ subj_combined = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo)) if val == subj_num]
+ subj_indexvals = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_id_with_index.append(subj_combined)
+ new_demo_dict = dict()
+ for key_var in sorted(demographic_data.keys()):
+ if 'latency' not in key_var and 'group' not in key_var and 'build' not in key_var and 'time' not in key_var and 'date' not in key_var:
+ new_demo_dict[key_var] = []
+ for index1, val1 in enumerate(key_val_all):
+ if val1[0] in new_demo_dict.keys():
+ new_demo_dict[val1[0]].append(val1[1])
+ counter_demo_dict = dict()
+ for key_q in sorted(new_demo_dict.keys()):
+ answer_count = collections.Counter(new_demo_dict[key_q])
+ print answer_count
+ counter_demo_dict[key_q] = answer_count
+ with open(frequency_count, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['survey_question', 'response_counts'])
+ for key, value in sorted(counter_demo_dict.items()):
+ writer.writerow([key, value])
+ csvfile.close()
+ subj_age_gender_mem = []
+ x = []
+ for idx2, subj_id in enumerate(subj_id_only_demo):
+ subj_age_gen = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]] for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x] == subj_id]
+ y = [[demographic_data['subject'][x]] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subj_id]
+ subj_age_gender_mem.append(subj_age_gen)
+ demo_subj_age_gender = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]]
+ for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x]]
+ raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ key_val = []
+ for key in age_ranges.keys():
+ for val in age_ranges[key]:
+ key_val.append([key, val])
+ id_age_agerange = []
+ with open(subj_age_agerange_gender, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'gender', 'age', 'age_range'])
+ for subj in sorted(demo_subj_age_gender):
+ subj_from_main_raw_list = []
+ ages = subj[2]
+ gender = subj[1]
+ subj_id_raw = [val for val in raw_data_responses if val[0] == subj[0]]
+ for vals in key_val:
+ age_vals = vals[1]
+ age_vals = str(age_vals)
+ if age_vals == ages:
+ complete_list = subj[0] + ',' + gender + "," + age_vals + "," + vals[0]
+ id_age_agerange.append(complete_list)
+ writer.writerow([subj[0], gender, age_vals, vals[0]])
+ csvfile.close()
+ subj_id_only = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only.append(subject)
+ subj_id_memory = [subj_mem_trials for subj_mem_trials in subj_id_only]
+ subj_ids_summary = [x for x in rey_summary['script.subjectid']]
+ subj_ids_summary = sorted(subj_ids_summary)
+ summary_key_val = []
+ for key in sorted(rey_summary.keys()):
+ for value in sorted(rey_summary[key]):
+ summary_key_val.append([key, value])
+ new_summary_dict = dict()
+ for sum_key in sorted(rey_summary.keys()):
+ if 'script.starttime' not in sum_key and 'script.startdate' not in sum_key and 'script.elapsedtime' not in sum_key and 'values.trialcount' not in sum_key and 'values.completed' not in sum_key and 'values.trialcount' not in sum_key and 'parameters.min_validlatency' not in sum_key and 'computer.platform' not in sum_key:
+ new_summary_dict[sum_key] = []
+ for sum_idx, sum_val in enumerate(summary_key_val):
+ if sum_val[0] in new_summary_dict.keys():
+ new_summary_dict[sum_val[0]].append(sum_val[1])
+ subject_summary_sr_responses = [[rey_summary['script.subjectid'][x], rey_summary['expressions.gad_7_total'][x],
+ rey_summary['expressions.phq_total'][x],
+ rey_summary['expressions.pcl_4_total'][x],
+ rey_summary['expressions.pcl_total_hybridscore_corrected'][x]] for x in
+ range(len(rey_summary['script.subjectid'])) if
+ rey_summary['values.end_survey_completed'][x] == '1']
+ subject_summary_ant_scores = [
+ [rey_summary['script.subjectid'][x], rey_summary['expressions.overallpercentcorrect'][x],
+ rey_summary['expressions.meanRT'][x], rey_summary['expressions.stdRT'][x]] for x in
+ range(len(rey_summary['script.subjectid'])) if rey_summary['values.end_survey_completed'][x] == '1']
+ with open(sr_responses, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'gad_7', 'phq', 'pcl_dsm4', 'pcl_hybrid'])
+ for responses in sorted(subject_summary_sr_responses):
+ writer.writerow(responses)
+ csvfile.close()
+ with open(summary_ant_scores, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'percent_correct', 'meanRT', 'stdRT'])
+ for scores in sorted(subject_summary_ant_scores):
+ writer.writerow(scores)
+ csvfile.close()
+# In[105]:
+def demo_and_summary_new(all_subj_data_csv, demographic_data, subj_age_agerange_gender):
+ with open(all_subj_data_csv, 'U') as file:
+ input_csv_lines_all_subj = csv.reader(file)
+ input_csv_lines_all_subj = map(list, zip(*input_csv_lines_all_subj))
+ all_subj_csv_lines = dict((rows[0], rows[1:]) for rows in input_csv_lines_all_subj)
+ with open(demographic_data, 'U') as file:
+ input_demo_sr_q_csv = csv.reader(file)
+ input_demo_sr_q_csv = map(list, zip(*input_demo_sr_q_csv))
+ demographic_data = dict((rows[0], rows[1:]) for rows in (input_demo_sr_q_csv))
+ age_ranges = {
+ '20-29': range(20, 30, 1),
+ '30-39': range(30, 40, 1),
+ '40-49': range(40, 50, 1),
+ '50-59': range(50, 60, 1),
+ '60-69': range(60, 70, 1),
+ '70-90': range(70, 90, 1)}
+ subj_id_list_demo = []
+ subj_id_only_demo = []
+ for subject in sorted(set(all_subj_csv_lines['subject'])):
+ subj_id_only_demo.append(subject)
+ subj_id_list_combined = [demographic_data['subject'][x] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subject]
+ subj_id_list_demo.append(subj_id_list_combined)
+ subj_id_combined = [(idx, val) for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_val = []
+ key_val_all = []
+ for key in sorted(demographic_data.keys()):
+ for value in sorted(demographic_data[key]):
+ key_val_all.append([key, value])
+ if 'subject' in key:
+ subj_val.append(value)
+ else:
+ continue
+ subj_id_with_index = list()
+ for subj_num in subj_val:
+ subj_combined = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo)) if val == subj_num]
+ subj_indexvals = [[idx, val] for idx, val in enumerate(sorted(subj_id_only_demo))]
+ subj_id_with_index.append(subj_combined)
+ subj_age_gender_mem = []
+ x = []
+ for idx2, subj_id in enumerate(subj_id_only_demo):
+ subj_age_gen = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]] for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x] == subj_id]
+ y = [[demographic_data['subject'][x]] for x in range(len(demographic_data['subject'])) if
+ demographic_data['subject'][x] == subj_id]
+ subj_age_gender_mem.append(subj_age_gen)
+ demo_subj_age_gender = [[demographic_data['subject'][x], demographic_data['gender_response'][x].lower(),
+ demographic_data['age_textbox_response'][x]]
+ for x in range(len(demographic_data['subject']))
+ if demographic_data['subject'][x]]
+ raw_data_responses = [[all_subj_csv_lines['subject'][x], all_subj_csv_lines['trialcode'][x],
+ all_subj_csv_lines['response'][x].lower()]
+ for x in range(len(all_subj_csv_lines['subject']))
+ if 'recall_response' in all_subj_csv_lines['trialcode'][x]]
+ key_val = []
+ for key in age_ranges.keys():
+ for val in age_ranges[key]:
+ key_val.append([key, val])
+ id_age_agerange = []
+ with open(subj_age_agerange_gender, 'wb') as csvfile:
+ writer = csv.writer(csvfile, delimiter=',')
+ writer.writerow(['subj_id', 'age', 'age_range', 'gender'])
+ for subj in sorted(demo_subj_age_gender):
+ subj_from_main_raw_list = []
+ ages = subj[2]
+ gender = subj[1]
+ subj_id_raw = [val for val in raw_data_responses if val[0] == subj[0]]
+ for vals in key_val:
+ age_vals = vals[1]
+ age_vals = str(age_vals)
+ if age_vals == ages:
+ complete_list = subj[0] + ',' + age_vals + "," + vals[0] + "," + gender
+ id_age_agerange.append(complete_list)
+ writer.writerow([subj[0], age_vals, vals[0], gender])
+ csvfile.close()
+# In[104]:
+import pandas
+def composite_scores(input_csv, output_csv):
+ scored_data = pandas.read_csv(input_csv)
+ print input_csv
+ df_trials = scored_data.loc[:, 'trial1':'trial7']
+ print df_trials.columns.tolist()
+ composite_scores = pandas.DataFrame()
+ tmp = pandas.DataFrame()
+ composite_scores['total_learning'] = df_trials[['trial1', 'trial2', 'trial3', 'trial4', 'trial5']].apply(
+ lambda row: np.sum(row), axis=1)
+ tmp['test'] = df_trials['trial1'].tolist() * 5
+ composite_scores['corrected_total_learning'] = composite_scores['total_learning'].subtract(tmp['test'])
+ composite_scores['learning_rate'] = df_trials['trial5'].subtract(df_trials['trial1'], axis='rows')
+ composite_scores['proactive_interference'] = df_trials['trial1'].subtract(scored_data['listb'], axis='rows')
+ composite_scores['retroactive_interference'] = df_trials['trial5'].subtract(df_trials['trial6'], axis='rows')
+ composite_scores['forgetting_and_retention'] = df_trials['trial5'].subtract(df_trials['trial7'], axis='rows')
+ composite_scores_transposed = composite_scores.transpose()
+ composite_scores_transposed.to_csv(output_csv, header=True, index=['measure', 'score'])
+ composite_scores.to_csv(output_csv, header=True, index=['measure', 'score'])
+# for scored in glob('/Users/lillyel-said/Desktop/vmreact/output/*_scored_data.csv'):
+# composite_scores(scored,scored.replace('_scored_data.csv','_composite_scores.csv'))
+# In[119]:
+import os
+import csv
+import collections
+from difflib import SequenceMatcher
+from math import ceil
+format = "%Y_%m_%d"
+current_date = datetime.datetime.today()
+date = current_date.strftime(format)
+output = '/Users/lillyel-said/Desktop/data_transfer/demo'
+def restructure_and_regrade_all_data(output):
+ for raw in glob(os.path.join(output, '*raw.csv')):
+ all_subj_data_csv = raw
+ path = raw.split('/')[-1]
+ path = path.split('_')[0:3]
+ id = '_'.join(path) + '_inquisit'
+ dir = os.path.join('/Users/lillyel-said/Desktop/data_transfer/', id, 'out')
+ demo_data = glob(
+ os.path.join('/Users/lillyel-said/Desktop/data_transfer/', id, 'csv', '*demographics_survey.csv'))
+ summary_data = glob(os.path.join('/Users/lillyel-said/Desktop/data_transfer/', id, 'csv', '*summary.csv'))
+ print summary_data[0]
+ grader(all_subj_data_csv, os.path.join(dir, 'parsed_raw_data' + '_' + date + '.csv'),
+ os.path.join(dir, 'scored_data' + '_' + date + '.csv'),
+ os.path.join(dir, 'word_correlations' + '_' + date + '.csv'), 0)
+ grader(all_subj_data_csv, os.path.join(dir, 'parsed_raw_data_primacy' + '_' + date + '.csv'),
+ os.path.join(dir, 'scored_data_primacy' + '_' + date + '.csv'),
+ os.path.join(dir, 'word_correlations_primacy' + '_' + date + '.csv'), 1)
+ grader(all_subj_data_csv, os.path.join(dir, 'parsed_raw_data_recency' + '_' + date + '.csv'),
+ os.path.join(dir, 'scored_data_recency' + '_' + date + '.csv'),
+ os.path.join(dir, 'word_correlations_recency' + '_' + date + '.csv'), 2)
+ composite_scores(os.path.join(dir, 'scored_data' + '_' + date + '.csv'),
+ os.path.join(dir, 'composite_scores_vakil' + '_' + date + '.csv'))
+ try:
+ demo_and_summary(all_subj_data_csv, demo_data[0], summary_data[0],
+ os.path.join(dir, 'frequency_counts' + '_' + date + '.csv'),
+ os.path.join(dir, 'subj_age_agerange_gender' + '_' + date + '.csv'),
+ os.path.join(dir, 'sr_responses' + '_' + date + '.csv'),
+ os.path.join(dir, 'summary_ant_scores' + '_' + date + '.csv'))
+ demo_and_summary_new(all_subj_data_csv, demo_data[0],
+ os.path.join(dir, 'subj_age_agerange_gender_new_age_bins' + '_' + date + '.csv'))
+ except:
+ continue
+# In[86]:
+# rename
+output_csv_location = '/Users/cdla/Desktop/scratch/vmreact/2_vmreact/'
+raw_data_csvs = '/Users/cdla/Desktop/scratch/vmreact/1_rawdata/*/*raw.csv'
+def standardize_and_rename_scored_csvs(output_csv_location, raw_data_path):
+ format = "%Y_%m_%d"
+ current_date = datetime.datetime.today()
+ date = current_date.strftime(format)
+ output_csv_location = '/Users/cdla/Desktop/scratch/vmreact/2_vmreact/'
+ for raw in glob('/Users/cdla/Desktop/scratch/vmreact/1_rawdata/*/*raw.csv'):
+ raw_data = raw
+ demo_data = raw.replace('raw.csv', 'demo.csv')
+ summary_data = raw.replace('raw.csv', 'summary.csv')
+ prefix = 'mturk_' + os.path.basename(os.path.dirname(raw_data)).split('_')[1] + '_'
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations.csv'), 0)
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data_primacy.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data_primacy.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations_primacy.csv'), 1)
+ grader(raw_data, os.path.join(output_csv_location, prefix + 'parsed_raw_data_recency.csv'),
+ os.path.join(output_csv_location, prefix + 'scored_data_recency.csv'),
+ os.path.join(output_csv_location, prefix + 'word_correlations_recency.csv'), 2)
+ copy(demo_data, os.path.join(output_csv_location, prefix + 'demo.csv'))
+ copy(summary_data, os.path.join(output_csv_location, prefix + 'summary.csv'))
+# In[ ]:
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/output/'
+for scored_csv in glob(os.path.join(scored_dir, '*scored*')):
+ with open(scored_csv, 'U') as source:
+ rdr = csv.reader(source)
+ with open(os.path.join(scored_dir, 'tmp.csv'), 'wb') as result:
+ wtr = csv.writer(result)
+ for r in rdr:
+ wtr.writerow(r[0:18])
+ move(os.path.join(scored_dir, 'tmp.csv'), scored_csv)
+ print scored_csv
+# In[ ]:
+# Getting composite scores from scored
+# In[ ]:
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/output/'
+# defining all dataframes from scored data
+def set_scored_to_df(scored_dir):
+ demo_cols = []
+ clin_raw_cols = []
+ sum_cols = ['script.startdate', 'script.starttime', 'subject',
+ 'expressions.gad_7_total', 'expressions.phq_total', 'expressions.pcl_4_total',
+ 'expressions.pcl_total_hybridscore_corrected', 'expressions.pcl_total_hybridscore_uncorrected']
+ scored_cols = ['subj_id', 'list_type', 'listb', 'trial1', 'trial2', 'trial3',
+ 'trial4', 'trial5', 'trial6', 'trial7', 'listb_#_repeats', 'trial1_#_repeats', 'trial2_#_repeats',
+ 'trial3_#_repeats', 'trial4_#_repeats', 'trial5_#_repeats', 'trial6_#_repeats', 'trial7_#_repeats']
+ composite_cols = ['subject', 'total_learning', 'corrected_total_learning', 'learning_rate',
+ 'proactive_interference', 'retroactive_interference', 'forgetting_and_retention']
+ age_range_gender_cols = ['age_range']
+ for batch in range(1, 9):
+ batch = str(batch)
+ demo = os.path.join(scored_dir, 'mturk_batch' + batch + '_demo.csv')
+ clin_raw = os.path.join(scored_dir, 'mturk_batch' + batch + '_end.csv')
+ summ = os.path.join(scored_dir, 'mturk_batch' + batch + '_summary.csv')
+ scored = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data.csv')
+ composite = os.path.join(scored_dir, 'mturk_batch' + batch + '_composite_scores.csv')
+ age_range_gender_csv = os.path.join(scored_dir, 'mturk_batch' + batch + '_age_range_gender.csv')
+ demo_df = pd.read_csv(demo, dtype=str)
+ # demo_cols.extend([x for x in demo_df.columns.tolist() if ('latency' not in x and 'online' not in x and 'Unnamed' not in x and 'time_comp' not in x and 'subj_id' not in x)])
+ demo_cols.extend([x for x in demo_df.columns.tolist() if
+ ('latency' not in x and 'Unnamed' not in x and 'subj_id' not in x and 'age_textbox')])
+ print batch
+ age_range_df = pd.read_csv(age_range_gender_csv)
+ age_range_gender_cols.extend(
+ [x for x in age_range_df.columns.tolist() if ('age' not in x and 'subj_id' not in x and 'gender' not in x)])
+ clin_raw_df = pd.read_csv(clin_raw, dtype=str)
+ clin_raw_cols.extend(
+ [x for x in clin_raw_df.columns.tolist() if 'latency' not in x and 'end' not in x and 'Unnamed' not in x])
+ sum_df = pd.read_csv(summ, dtype=str)
+ scored_df = pd.read_csv(scored, dtype=str)
+ comp_df = pd.read_csv(composite, dtype=str).rename(index=str, columns={'Unnamed: 0': 'subject'})
+ age_range_gender = pd.read_csv(age_range_gender_csv, dtype=str)
+ demo_cols = list(set(demo_cols))
+ clin_raw_cols = list(set(clin_raw_cols))
+ return demo_cols, clin_raw_cols
+# need to get latency values,
+# use the scored to set the subject ids.
+# append composite to scored_cols since they're in the same order and composite doesn't have subject ids
+# summary - use script.subjectid
+# demo - use subject
+# clin_raw - use subject
+# In[ ]:
+import numpy as np
+scored_dir = '/Users/lillyel-said/Desktop/vmreact/vmreact/2_vmreact/'
+latency_csv = os.path.join(scored_dir, 'vmreact_latency_summary.csv')
+def batch_merge(scored_dir, latency_csv):
+ for batch in range(1, 9):
+ # for batch in [8]:
+ batch_df = pd.DataFrame()
+ batch = str(batch)
+ print 'mturk_batch' + batch
+ demo = os.path.join(scored_dir, 'mturk_batch' + batch + '_demo.csv')
+ clin_raw = os.path.join(scored_dir, 'mturk_batch' + batch + '_end.csv')
+ sum = os.path.join(scored_dir, 'mturk_batch' + batch + '_summary.csv')
+ scored = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data.csv')
+ primacy = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data_primacy.csv')
+ recency = os.path.join(scored_dir, 'mturk_batch' + batch + '_scored_data_recency.csv')
+ composite = os.path.join(scored_dir, 'mturk_batch' + batch + '_composite_scores.csv')
+ demo_df = pd.read_csv(demo, dtype=str)
+ clin_raw_df = pd.read_csv(clin_raw, dtype=str)
+ sum_df = pd.read_csv(sum, dtype=str).rename(index=str, columns={'script.subjectid': 'subject'})
+ scored_df = pd.read_csv(scored)
+ primacy_df = pd.read_csv(primacy, dtype=str)
+ recency_df = pd.read_csv(recency, dtype=str)
+ extra_measures = primacy_df.merge(recency_df, on='subj_id', left_index=True, how='left',
+ suffixes=('_primacy', '_recency')).rename(columns={'subj_id': 'subject'})
+ comp_df = pd.read_csv(composite).rename(index=str, columns={'Unnamed: 0': 'subject'})
+ comp_df['subject'] = comp_df['subject'].apply(int)
+ vmreact_df = pd.merge(scored_df, comp_df, left_index=True, right_on='subject', how='left').drop('subject',
+ axis=1)
+ vmreact_df['subj_id'] = vmreact_df['subj_id'].astype(str)
+ # vmreact_df['subj_id']=vmreact_df['subj_id'].apply(pd.to_numeric)
+ latency_df = pd.read_csv(latency_csv, dtype=str)
+ latency_df = latency_df.drop_duplicates().reset_index()
+ subject_ids = vmreact_df['subj_id'].tolist()
+ vmreact_df = vmreact_df.merge(extra_measures, left_on='subj_id', right_on='subject').drop('subject', axis=1)
+ batch_demo_cols = [x for x in demo_df.columns.tolist() if x in demo_cols]
+ append_demo_cols = [x for x in demo_cols if x not in demo_df.columns.tolist()]
+ demo_df = demo_df[demo_df['subject'].astype(str).isin(subject_ids)][batch_demo_cols]
+ for col in append_demo_cols:
+ demo_df[col] = np.nan
+ # print demo_df
+ # demo_df['subject']=demo_df['subject'].apply(pd.to_numeric)
+ batch_clin_cols = [x for x in clin_raw_df.columns.tolist() if x in clin_raw_cols]
+ append_clin_cols = [x for x in clin_raw_cols if x not in clin_raw_df.columns.tolist()]
+ clin_raw_df = clin_raw_df[clin_raw_df['subject'].astype(str).isin(subject_ids)][batch_clin_cols]
+ for col in sorted(append_clin_cols):
+ clin_raw_df[col] = np.nan
+ # clin_raw_df['subject']=clin_raw_df['subject'].apply(pd.to_numeric)
+ batch_sum_cols = [x for x in sum_df.columns.tolist() if x in sum_cols]
+ append_sum_cols = [x for x in sum_cols if x not in sum_df.columns.tolist()]
+ sum_df = sum_df[sum_df['subject'].astype(str).isin(subject_ids)][batch_sum_cols]
+ for col in sorted(append_sum_cols):
+ sum_df[col] = np.nan
+ # sum_df['subject']=sum_df['subject'].apply(pd.to_numeric)
+ batch_df = demo_df.merge(sum_df, left_on='subject', right_on='subject').drop(
+ ['script.startdate', 'script.starttime'], axis=1)
+ batch_df = batch_df.merge(clin_raw_df, left_on='subject', right_on='subject').drop(
+ ['date_y', 'time_y', 'group_y', 'build_y'], axis=1)
+ batch_df = batch_df.merge(vmreact_df, left_on='subject', right_on='subj_id').drop('subj_id', axis=1)
+ batch_df = batch_df.rename(columns={'date_x': 'date', 'time_x': 'time', 'group_x': 'group', 'build_x': 'build'})
+ # print batch_df
