-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparallel-structure.sh
115 lines (82 loc) · 3.26 KB
/
parallel-structure.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env bash
# "parallel-structure.sh"
# Author: Alexander Koc <[email protected]>
# Version: 0.6 <2021-04-13>
# Description: Wrapper around structure allowing for parallel runs of multiple values of MAXPOP/K and replicates
# Usage:
# $ bash parallel-structure.sh path/to/mainparams path/to/extraparams path/to/input_data path/to/output_directory min_K max_K nbr_reps nbr_parallel_jobs
if [ "$#" -eq "0" ]
then
echo ""
echo "parallel-structure.sh"
echo ""
echo "Author: Alexander Koc <[email protected]>"
echo "Version: 0.6 <2021-04-13>"
echo "Description: Wrapper around structure allowing for parallel runs of multiple values of MAXPOP/K and replicates"
echo ""
echo -e "\tUsage:"
echo -e "\t$ bash run-structure-analysis.sh path/to/mainparams path/to/extraparams path/to/input_data path/to/output_directory min_K max_K nbr_reps nbr_parallel_jobs"
echo ""
echo "For more instructions/examples, please refer to: https://github.com/alkc/parallel-structure"
echo ""
exit 0
fi
mainparams="$1"
extraparams="$2"
inputdata="$3"
outputfolder="$4"
k_min=$5
k_max=$6
n_reps=$7
# Script parameters:
n_parallel_jobs="$8"
k_series=$(seq $k_min 1 $k_max)
rep_series=$(seq 1 1 $n_reps)
# Process settings:
function file_exists {
file="$1"
if [ ! -f "$file" ]
then
echo "[ERROR] $file does not exist."
exit 1
fi
}
function run_structure {
rand_seed="$RANDOM"
mainparams="$1"
extraparams="$2"
inputdata="$3"
outputfolder="$4"
curr_k="$5"
curr_rep="$6"
extraparams_tmp="/tmp/extraparams_K${curr_k}_REP${curr_rep}_${rand_seed}"
cat ${extraparams} > ${extraparams_tmp}
echo -e "" >> ${extraparams_tmp}
echo -e "#define SEED $rand_seed" >> ${extraparams_tmp}
sed '/RANDOMIZE/d' -i ${extraparams_tmp}
echo -e "#define RANDOMIZE 0" >> ${extraparams_tmp}
# cat $extraparams_tmp
structure -m ${mainparams} -e ${extraparams_tmp} -i ${inputdata} -o ${outputfolder}/K${curr_k}_REP${curr_rep} -K ${curr_k} > ${outputfolder}/K${curr_k}_REP${curr_rep}.output.txt
rm ${extraparams_tmp}
}
export -f run_structure
# Script start
echo "[START] Hello there."
echo "[INFO] Script running with following parameters:"
echo "[INFO] Input data: ${inputdata}"
file_exists $inputdata
echo "[INFO] Main parameter file: ${mainparams}"
file_exists $mainparams
echo "[INFO] Extra params file: ${extraparams}"
file_exists $extraparams
echo "[INFO] BTW. This file will be modified to add a random seed for each run. Custom seeds or RANDOMIZE=1 will be overriden."
echo "[INFO] Output set to be saved in: ${outputfolder}"
[ ! -d "$outputfolder" ] && echo "[INFO] Creating directory: $outputfolder" && mkdir -p $outputfolder
echo "[INFO] Script will run ${n_parallel_jobs} jobs."
echo "[INFO] Using Testing MAXPOP/K from K = ${k_min}-${k_max}"
echo "[INFO] $n_reps reps per K"
echo "[INFO] Buckle up. Starting the structure run in parallel."
# Execute structure in parallel over $n_threads.
parallel --progress -j ${n_parallel_jobs} run_structure ${mainparams} ${extraparams} ${inputdata} ${outputfolder} {1} {2} ::: "${k_series[@]}" ::: "${rep_series[@]}"
echo "[INFO] All structure runs completed. (... Alternatively, all structure runs exited with errors)."
echo "[EXIT] Bye."