-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathblca_multiscreen.sh
114 lines (82 loc) · 3.24 KB
/
blca_multiscreen.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash
numeric='^[0-9]+$'
source conda.sh # CHANGE to path of conda.sh
conda activate BLCA_env # CHANGE if environment name is different
### validate arguments ----
correct_usage="Usage: $0 [path to base file] [number of split files]"
if [[ ( $* == "--help") || $* == "-h" || $# -le 1 ]]
then
echo "$correct_usage"
exit 0
fi
base_name=$(basename ${1})
ext=$(echo "$base_name" | awk -F. '{print ($NF)}') # use '.' as delimiter; extract lowercase file extension
pref=$(echo "$base_name" | awk -F. '{print ($1)}')
if ! [[ ".$ext" = ".fasta" ]] && ! [[ ".$ext" = ".fa" ]]
then
printf "Error: supply the path to a file with one of the following extensions: .fasta, .fa. \n%s$ext was provided.\n"
echo "$correct_usage"
exit 0
elif ! [[ $2 =~ $numeric ]]
then
echo "Error: supply numeric as second argument. $2 was provided."
echo "$correct_usage"
exit 0
fi
### ----
base_filepath=$1 # full (absolute) path to provided file
base_filedir="${base_filepath%/*}/" # directory that houses the provided file
declare -i num_files="$2" # convert to numeric
printf "Path to input file: $base_filepath\n"
# main folder for output
if ! mkdir "${base_filedir}BLCA_output"; then
printf "The folder <<BLCA_output>> already exists. Please rename or move this folder to avoid conflicts.\n"
exit 1
fi
mkdir "${base_filedir}BLCA_output/files" # folder to hold BLCA files
mkdir "${base_filedir}BLCA_output/logs" # folder to hold screen logfiles
pyfasta split -n "$num_files" "$base_filepath"
file_names=()
for ((i=0; i<num_files; i++)); do
if [ "$i" -ge 10 ]; then
file_names[i]=$(printf "%sBLCA_output/files/%s.%d.%s" "$base_filedir" "$pref" "$i" "$ext")
location=$(printf "%s%s.%d.%s" "$base_filedir" "$pref" "$i" "$ext")
mv $location "$base_filedir/BLCA_output/files"
else
file_names[i]=$(printf "%sBLCA_output/files/%s.0%d.%s" "$base_filedir" "$pref" "$i" "$ext")
location=$(printf "%s%s.0%d.%s" "$base_filedir" "$pref" "$i" "$ext")
mv $location "$base_filedir/BLCA_output/files"
fi
done
printf "\nSplit files are in ${base_filedir}BLCA_output/files\n"
# open screen sessions (sessions are created and detached)
# CHANGE location of screen_command.sh if necessary
for ((i=0; i<num_files; i++)); do
screen -d -m -S "BLCA_task_$i" -L -Logfile "$base_filedir/BLCA_output/logs/out_BLCA_task_$i.txt" screen_command.sh "${file_names[i]}"
done
done=0
printf "\nWaiting for all tasks to finish...\n"
while [[ $done -ne 1 ]]; do
if !(screen -list | grep -q "BLCA_task_"); then
done=1
fi
sleep 1
done
cd $base_filedir
mv *.$ext."gdx" "$base_filedir/BLCA_output/files"
mv *.$ext."flat" "$base_filedir/BLCA_output/files"
cd BLCA_output
cd files
# concatenate individual task files into
# combined BLCA results for the entire input file
cat $pref.*.$ext."blastn" > ../combined.fa.blastn
cat $pref.*.$ext."blca.out" > ../combined.fa.blca.out
cd ..
# output info to general log
printf "file processed: $pref\n\n" >> BLCA_log.txt
printf "number of sequences processed / BLCA output file:\n" >> BLCA_log.txt
wc -l combined.fa.blca.out >> BLCA_log.txt
cd files
printf "\nnumber of files generated by BLCA:\n" >> ../BLCA_log.txt
ls -1 | wc -l >> ../BLCA_log.txt
printf "\nTasks complete. All files are located in ${base_filedir}BLCA_output/\n"