-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathphone_diary_viz.sh
88 lines (73 loc) · 3.28 KB
/
phone_diary_viz.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
# test using console and log file simultaneously
exec > >(tee -ia viz.log)
exec 2> >(tee -ia viz.log >&2)
# this script generates relevant visualizations (as well as intermediate outputs such as compiling study-wide distribution) for both audio and transcript sides of the pipeline
# start by getting the absolute path to the directory this script is in, which will be the top level of the repo
# this way script will work even if the repo is downloaded to a new location, rather than relying on hard coded paths to where I put the repo.
full_path=$(realpath $0)
repo_root=$(dirname $full_path)
# export the path to the repo for scripts called by this script to also use - will unset at end
export repo_root
# gather user settings, first asking which study the code should run on - this is only setting currently for the viz side
# (in future will want to be able to read this from a config file so code can be run with no user intervention - hold up right now is password handling)
echo "Study of interest?"
echo "(should match PHOENIX study name, validated options are BLS and DPBPD)"
read study
# sanity check that the study folder is real at least
cd /data/sbdp/PHOENIX/PROTECTED
if [[ ! -d $study ]]; then
echo "invalid study id"
exit
fi
cd "$study" # switch to study folder for first loop over patient list
# make study an environment variable, for calling bash scripts throughout this script. will be unset at end
export study
# let user know script is starting
echo ""
echo "Beginning script - diary visualization generation for:"
echo "$study"
echo ""
# add current time for runtime tracking purposes
now=$(date +"%T")
echo "Current time: ${now}"
echo ""
# start with distributions - per patient and for the overall study
# feature distributions (OpenSMILE and NLP) also generated here, including doing OpenSMILE summary operation
echo "Generating QC and feature distributions with histograms"
bash "$repo_root"/individual_modules/run_distribution_plots.sh
echo ""
# add current time for runtime tracking purposes
now=$(date +"%T")
echo "Current time: ${now}"
echo ""
# create heatmaps to see progression of select audio and transcript QC features over time per patient (each diary one block)
# (could also propose alternative dot plots?)
echo "Generating QC heatmaps for each patient"
bash "$repo_root"/individual_modules/run_heatmap_plots.sh
echo ""
# add current time for runtime tracking purposes
now=$(date +"%T")
echo "Current time: ${now}"
echo ""
# sentiment-colored wordclouds for the transcripts
echo "Generating sentiment-colored wordclouds for each available transcript"
bash "$repo_root"/individual_modules/run_wordclouds.sh
echo ""
# add current time for runtime tracking purposes
now=$(date +"%T")
echo "Current time: ${now}"
echo ""
# finally do correlation matrices for the study-wide distributions
# since no need to loop over patients here or do any other bash preprocessing, just call python script directly
echo "Creating study-wide correlation matrices"
python "$repo_root"/individual_modules/functions_called/phone_diary_correlations.py "$study"
echo ""
# add current time for runtime tracking purposes
now=$(date +"%T")
echo "Current time: ${now}"
echo ""
# script wrap up - unset environment variables so doesn't mess with future scripts
unset study
unset repo_root
echo "Script completed"