Skip to content

Commit

Permalink
#6 Adding advanced options functionality to each config file and wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
Samuel Hamann committed Mar 6, 2020
1 parent 6e626f9 commit c664bc7
Show file tree
Hide file tree
Showing 17 changed files with 204 additions and 81 deletions.
5 changes: 5 additions & 0 deletions Configuration_Files/Abbababa_Config
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,8 @@ DO_ABBABABA=1
REMOVE_TRANS=0
# Set the size for each block
BLOCKSIZE=1000

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/Ancestral_Sequence_Config
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,8 @@ DO_FASTA=1
# If DO_FASTA is 2, DO_COUNTS must be 1
# Otherwise, DO_COUNTS can be any other legal value
DO_COUNTS=0

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/FST_Config
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,8 @@ GLOBAL=true
WIN=1000
# Set the step size for sliding window analysis
STEP=500

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/Genotype_Config
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ POST_CUTOFF=0.95
SNP_PVAL=1e-6
# Output genotype likelihood frequency file
DO_GLF=2

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/Inbreeding_Coefficients_Config
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ DO_GLF=3
# Set a seed value for creating approximate inbreeding coefficients
# Use the random number generator built into BASH
SEED=$RANDOM

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/Principal_Component_Analysis_Config
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,8 @@ NORM=0
CALL=0
# Set the maximum number of sites to use for Principal Component Analysis
N_SITES=100000

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/Site_Frequency_Spectrum_Config
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,8 @@ DO_POST=1
DO_SAF=2
# Overwrite any previously generated results
OVERRIDE=true

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
5 changes: 5 additions & 0 deletions Configuration_Files/Thetas_Config
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,8 @@ SLIDING_WINDOW=false
WIN=1000
# Set the step size for sliding window analysis
STEP=500

# For advanced users who want to change arguments used by ANGSD (i.e. which SAF method is used)
# Expected format is the same as how the flags and arguments are written on the command line:
# '-flag1 arg1 -flag2 arg2 ...'
ADVANCED_ARGS=''
19 changes: 10 additions & 9 deletions Wrappers/Abbababa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ then
Rscript "${SOURCE}"/Wrappers/sortRegions.R "${REGIONS}" "${FAI}"
REGIONS="$(find $(dirname ${REGIONS}) -name "*_sorted.txt")"
echo "Running Abbababa" >&2
"${ANGSD_DIR}"/angsd \
-doAbbababa "${DO_ABBABABA}" \
WRAPPER_ARGS=$(echo -doAbbababa "${DO_ABBABABA}" \
-rmTrans "${REMOVE_TRANS}" \
-blockSize "${BLOCKSIZE}" \
-doCounts "${DO_COUNTS}" \
Expand All @@ -60,13 +59,12 @@ then
-minInd "${MIN_IND}" \
-nThreads "${N_CORES}" \
-rf "${REGIONS}" \
-out "${OUT}"/"${PROJECT}".D
-out "${OUT}"/"${PROJECT}".D)
# Are we missing a definiton for regions?
elif [[ -z "${REGIONS}" ]]
then
echo "Running Abbababa" >&2
"${ANGSD_DIR}"/angsd \
-doAbbababa "${DO_ABBABABA}" \
WRAPPER_ARGS=$(echo -doAbbababa "${DO_ABBABABA}" \
-rmTrans "${REMOVE_TRANS}" \
-blockSize "${BLOCKSIZE}" \
-doCounts "${DO_COUNTS}" \
Expand All @@ -77,12 +75,11 @@ then
-minQ "${MIN_BASEQUAL}" \
-minInd "${MIN_IND}" \
-nThreads "${N_CORES}" \
-out "${OUT}"/"${PROJECT}".D
-out "${OUT}"/"${PROJECT}".D)
# Assuming a single reigon was defined in config file
else
echo "Running Abbababa" >&2
"${ANGSD_DIR}"/angsd \
-doAbbababa "${DO_ABBABABA}" \
WRAPPER_ARGS=$(echo -doAbbababa "${DO_ABBABABA}" \
-rmTrans "${REMOVE_TRANS}" \
-blockSize "${BLOCKSIZE}" \
-doCounts "${DO_COUNTS}" \
Expand All @@ -94,8 +91,12 @@ else
-minInd "${MIN_IND}" \
-nThreads "${N_CORES}" \
-r "${REGIONS}" \
-out "${OUT}"/"${PROJECT}".D
-out "${OUT}"/"${PROJECT}".D)
fi
# Check for advanced arguments, and overwrite any overlapping definitions
FINAL_ARGS=$(source ${SOURCE}/Wrappers/Arg_Zipper.sh "${WRAPPER_ARGS}" "${ADVANCED_ARGS}")
# echo "Final arguments: ${FINAL_ARGS}" 1<&2
"${ANGSD_DIR}"/angsd ${FINAL_ARGS}

# jackKnife.R is provided with angsd.
echo "Using jackKnife.R to finish Abbababa" >&2
Expand Down
10 changes: 6 additions & 4 deletions Wrappers/Ancestral_Sequence.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@ ANGSD_DIR=${SOURCE}/dependencies/angsd
# Check to see if we're using -doCounts
if [[ -z "${DO_COUNTS}" ]]
then
"${ANGSD_DIR}"/angsd \
-doFasta "${DO_FASTA}" \
WRAPPER_ARGS=$(echo -doFasta "${DO_FASTA}" \
-i "${ANC_BAM}"\
-out "${OUT}"
else
"${ANGSD_DIR}"/angsd \
-doFasta "${DO_FASTA}" \
WRAPPER_ARGS=$(echo -doFasta "${DO_FASTA}" \
-doCounts "${DO_COUNTS}" \
-i "${ANC_BAM}"\
-out "${OUT}"
fi
# Check for advanced arguments, and overwrite any overlapping definitions
FINAL_ARGS=$(source ${SOURCE}/Wrappers/Arg_Zipper.sh "${WRAPPER_ARGS}" "${ADVANCED_ARGS}")
# echo "Final arguments: ${FINAL_ARGS}" 1<&2
"${ANGSD_DIR}"/angsd ${FINAL_ARGS}
# If we have SAMTools, might as well index
if `command -v samtools > /dev/null 2> /dev/null`
Expand Down
68 changes: 68 additions & 0 deletions Wrappers/Arg_Zipper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

set -e
set -u


echo -e "Zipping advanced arguments onto basic ones...\n" 1>&2
wrapper_args=$1 # Which arguments are set by default?
adv_args=$2 # Which arguments were defined by an advanced user?
# Debugging outputs
# echo -e "Wrapper's: ${wrapper_args}\n" 1>&2
# echo -e "User's: ${adv_args}\n" 1>&2

# Initialize lists for flags and values, and the final aggregate string
FINAL_FLAGS=()
FINAL_VALS=()
FINAL_ARGS=""

# Need to check existence of first flag-argument pair before entering while loop (~do-while loop)
k=1
flag=$(echo "${wrapper_args}" | cut -d " " -f $(( k )))
val=$(echo "${wrapper_args}" | cut -d " " -f $(( k+1 )))
while [[ ! -z $flag && ! -z $val ]]
do
FINAL_FLAGS+=("${flag}")
FINAL_VALS+=("${val}")
let k=k+2
flag=$(echo "${wrapper_args}" | cut -d " " -f $(( k )))
val=$(echo "${wrapper_args}" | cut -d " " -f $(( k+1 )))
done

# Need to check existence of first flag-argument pair before entering while loop (~do-while loop)
k=1
flag=$(echo "${adv_args}" | cut -d " " -f $(( k )))
val=$(echo "${adv_args}" | cut -d " " -f $(( k+1 )))
while [[ ! -z "${flag}" && ! -z "${val}" ]]
do
# Check for pre-existing flags in the wrapper's args that share the same name
# If such a flag exists, overwrite the associated value.
# Otherwise append the pair to the list
found=false
for i in "${!FINAL_FLAGS[@]}"
do
if [[ "${FINAL_FLAGS[i]}" = "${flag}" ]]
then
echo "Found an overlapping flag '${flag}', overwriting value '${FINAL_VALS[i]}' with '${val}'" 1>&2
FINAL_VALS[i]="${val}"
found=true
fi
done

if [ $found = false ]
then
FINAL_FLAGS+=("${flag}")
FINAL_VALS+=("${val}")
fi
(( k=k+2 ))
flag=$(echo "${adv_args}" | cut -d " " -f $(( k )))
val=$(echo "${adv_args}" | cut -d " " -f $(( k+1 )))
done

for i in "${!FINAL_FLAGS[@]}"
do
# echo -e "FINAL_FLAGS[${i}]: ${FINAL_FLAGS[i]}\t" 1<&2
FINAL_ARGS+="${FINAL_FLAGS[i]} ${FINAL_VALS[i]} "
done

echo "${FINAL_ARGS}"
39 changes: 21 additions & 18 deletions Wrappers/FST.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ else
if [[ -f "${REGIONS}" ]]
then
echo "WRAPPER: $GROUP_1 sfs starting..." >&2
"${ANGSD_DIR}"/angsd \
-bam "${G1_SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${GROUP_1}"_Intergenic \
-doMajorMinor "${DO_MAJORMINOR}" \
-doMaf "${DO_MAF}" \
Expand All @@ -75,12 +74,11 @@ else
-GL "${GT_LIKELIHOOD}" \
-P "${N_CORES}" \
-rf "${REGIONS}" \
-doPost "${DO_POST}"
-doPost "${DO_POST}")
elif [[ -z "${REGIONS}" ]]
then
echo "WRAPPER: $GROUP_1 sfs starting" >&2
"${ANGSD_DIR}"/angsd \
-bam "${G1_SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${GROUP_1}"_Intergenic \
-doMajorMinor "${DO_MAJORMINOR}" \
-doMaf "${DO_MAF}" \
Expand All @@ -96,11 +94,10 @@ else
-ref "${REF_SEQ}" \
-GL "${GT_LIKELIHOOD}" \
-P "${N_CORES}" \
-doPost "${DO_POST}"
-doPost "${DO_POST}")
else
echo "WRAPPER: $GROUP_1 sfs starting" >&2
"${ANGSD_DIR}"/angsd \
-bam "${G1_SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${GROUP_1}"_Intergenic \
-doMajorMinor "${DO_MAJORMINOR}" \
-doMaf "${DO_MAF}" \
Expand All @@ -116,9 +113,14 @@ else
-ref "${REF_SEQ}" \
-GL "${GT_LIKELIHOOD}" \
-P "${N_CORES}" \
-r "${REGIONS}"
-r "${REGIONS}")
fi
fi
# Check for advanced arguments, and overwrite any overlapping definitions
FINAL_ARGS=$(source ${SOURCE}/Wrappers/Arg_Zipper.sh "${WRAPPER_ARGS}" "${ADVANCED_ARGS}")
# echo "Final arguments: ${FINAL_ARGS}" 1<&2
"${ANGSD_DIR}"/angsd ${FINAL_ARGS}


# For 2nd group:
if [[ -f "${OUT}"/"${GROUP_2}_Intergenic.saf" ]] && [ "$OVERRIDE" = "false" ]
Expand All @@ -129,8 +131,7 @@ else
if [[ -f "${REGIONS}" ]]
then
echo "WRAPPER: $GROUP_2 sfs starting..." >&2
"${ANGSD_DIR}"/angsd \
-bam "${G2_SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${GROUP_2}"_Intergenic \
-doMajorMinor "${DO_MAJORMINOR}" \
-doMaf "${DO_MAF}" \
Expand All @@ -147,13 +148,12 @@ else
-GL "${GT_LIKELIHOOD}" \
-P "${N_CORES}" \
-rf "${REGIONS}" \
-doPost "${DO_POST}"
-doPost "${DO_POST}")
# Are we missing a definiton for regions?
elif [[ -z "${REGIONS}" ]]
then
echo "WRAPPER: $GROUP_2 sfs starting..." >&2
"${ANGSD_DIR}"/angsd \
-bam "${G2_SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${GROUP_2}"_Intergenic \
-doMajorMinor "${DO_MAJORMINOR}" \
-doMaf "${DO_MAF}" \
Expand All @@ -169,12 +169,11 @@ else
-ref "${REF_SEQ}" \
-GL "${GT_LIKELIHOOD}" \
-P "${N_CORES}" \
-doPost "${DO_POST}"
-doPost "${DO_POST}")
# Assuming a single reigon was defined in config file
else
echo "WRAPPER: $GROUP_2 sfs starting..." >&2
"${ANGSD_DIR}"/angsd \
-bam "${G2_SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${GROUP_2}"_Intergenic \
-doMajorMinor "${DO_MAJORMINOR}" \
-doMaf "${DO_MAF}" \
Expand All @@ -190,9 +189,13 @@ else
-ref "${REF_SEQ}" \
-GL "${GT_LIKELIHOOD}" \
-P "${N_CORES}" \
-r "${REGIONS}"
-r "${REGIONS}")
fi
fi
# Check for advanced arguments, and overwrite any overlapping definitions
FINAL_ARGS=$(source ${SOURCE}/Wrappers/Arg_Zipper.sh "${WRAPPER_ARGS}" "${ADVANCED_ARGS}")
echo "Final arguments: ${FINAL_ARGS}" 1<&2
"${ANGSD_DIR}"/angsd ${FINAL_ARGS}

# Estimate joint SFS using realSFS
echo "WRAPPER: realSFS 2dsfs..." >&2
Expand Down
21 changes: 11 additions & 10 deletions Wrappers/Genotypes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ ANGSD_DIR="${SOURCE}"/dependencies/angsd
# Do we have a regions file?
if [[ -f "${REGIONS}" ]]
then
"${ANGSD_DIR}"/angsd \
-bam "${SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${PROJECT}"_snps \
-indF "${SAMPLE_INBREEDING}" \
-doMajorMinor "${DO_MAJORMINOR}" \
Expand All @@ -57,12 +56,11 @@ then
-SNP_pval "${SNP_PVAL}" \
-nInd "${N_IND}" \
-minInd "${MIN_IND}" \
-P "${N_CORES}"
-P "${N_CORES}")
# Are we missing a definiton for regions?
elif [[ -z "${REGIONS}" ]]
then
"${ANGSD_DIR}"/angsd \
-bam "${SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${PROJECT}"_snps \
-indF "${SAMPLE_INBREEDING}" \
-doMajorMinor "${DO_MAJORMINOR}" \
Expand All @@ -78,11 +76,10 @@ then
-SNP_pval "${SNP_PVAL}" \
-nInd "${N_IND}" \
-minInd "${MIN_IND}" \
-P "${N_CORES}"
# Assuming a single reigon was defined in config file
-P "${N_CORES}")
# Assuming a single region was defined in config file
else
"${ANGSD_DIR}"/angsd \
-bam "${SAMPLE_LIST}" \
WRAPPER_ARGS=$(echo -bam "${SAMPLE_LIST}" \
-out "${OUT}"/"${PROJECT}"_snps \
-indF "${SAMPLE_INBREEDING}" \
-doMajorMinor "${DO_MAJORMINOR}" \
Expand All @@ -99,5 +96,9 @@ else
-SNP_pval "${SNP_PVAL}" \
-nInd "${N_IND}" \
-minInd "${MIN_IND}" \
-P "${N_CORES}"
-P "${N_CORES}")
fi
# Check for advanced arguments, and overwrite any overlapping definitions
FINAL_ARGS=$(source ${SOURCE}/Wrappers/Arg_Zipper.sh "${WRAPPER_ARGS}" "${ADVANCED_ARGS}")
# echo "Final arguments: ${FINAL_ARGS}" 1<&2
"${ANGSD_DIR}"/angsd ${FINAL_ARGS}
Loading

0 comments on commit c664bc7

Please sign in to comment.