diff --git a/CHANGELOG b/CHANGELOG index c6277f9..32c20dc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +*********************************** +CHANGES IN VERSION 2.11.4 + +BUG FIXES + + o Fix major bug in parallel mode from 2.11.3 + *********************************** CHANGES IN VERSION 2.11.3 diff --git a/bin/HiC-Pro b/bin/HiC-Pro index 9dde368..66dd4d2 100755 --- a/bin/HiC-Pro +++ b/bin/HiC-Pro @@ -9,7 +9,7 @@ SOFT="HiC-Pro" -VERSION="2.11.3" +VERSION="2.11.4" function usage { echo -e "usage : $SOFT -i INPUT -o OUTPUT -c CONFIG [-s ANALYSIS_STEP] [-p] [-h] [-v]" @@ -248,9 +248,9 @@ fi ## Check rawdata structure if [[ $NEED_FASTQ == 1 ]]; then nbin=$(find -L $INPUT -mindepth 2 -maxdepth 2 -name "*.fastq" -o -name "*.fastq.gz" -o -name "*.fq" -o -name "*.fq.gz" | wc -l) #! - nbin_r1=$(find -L $INPUT -mindepth 2 -maxdepth 2 -name "*.fastq*" -o -name ".fq*" -and -name "*${PAIR1_EXT}*" | wc -l) - nbin_r2=$(find -L $INPUT -mindepth 2 -maxdepth 2 -name "*.fastq*" -o -name ".fq*" -and -name "*${PAIR2_EXT}*" | wc -l) - + nbin_r1=$(find -L $INPUT -mindepth 2 -maxdepth 2 -name "*.fastq*" -o -name "*.fq*" -and -name "*${PAIR1_EXT}*" | wc -l) + nbin_r2=$(find -L $INPUT -mindepth 2 -maxdepth 2 -name "*.fastq*" -o -name "*.fq*" -and -name "*${PAIR2_EXT}*" | wc -l) + if [ $nbin == 0 ]; then die "Error: Directory Hierarchy of rawdata '$INPUT' is not correct. No '.fastq(.gz)' files detected" fi diff --git a/scripts/bowtie_wrap.sh b/scripts/bowtie_wrap.sh index 0bc1621..62aa85f 100755 --- a/scripts/bowtie_wrap.sh +++ b/scripts/bowtie_wrap.sh @@ -51,7 +51,7 @@ end_to_end_align() else cmd=$cmd"> ${odir}/${prefix}_${REFERENCE_GENOME}.bwt2glob.bam" fi - + exec_cmd $cmd } diff --git a/scripts/hic.inc.sh b/scripts/hic.inc.sh index d5599ea..ca340a0 100755 --- a/scripts/hic.inc.sh +++ b/scripts/hic.inc.sh @@ -134,7 +134,7 @@ filter_pairs() get_data_type() { ## return the highest possible input files type - nb_fq=$(find -L $RAW_DIR -mindepth 2 -maxdepth 2 -name "*.fastq" -o -name "*.fastq.gz" -o -name ".fq" -o -name ".fq.gz"| wc -l) + nb_fq=$(find -L $RAW_DIR -mindepth 2 -maxdepth 2 -name "*.fastq" -o -name "*.fastq.gz" -o -name "*.fq" -o -name "*.fq.gz"| wc -l) nb_bam=$(find -L $RAW_DIR -mindepth 2 -maxdepth 2 -name "*.bam" -o -name "*.sam" | wc -l) nb_vpairs=$(find -L $RAW_DIR -mindepth 2 -maxdepth 2 -name "*.validPairs" | wc -l) nb_allvpairs=$(find -L $RAW_DIR -mindepth 2 -maxdepth 2 -name "*.allValidPairs" | wc -l) @@ -195,18 +195,45 @@ get_hic_files() if [ ! -z "$FASTQFILE" ]; then if [ ! -z "$TASKID" ]; then local input_data_type=$(get_data_type) - cat $FASTQFILE | filter_rawdir | filter_pairs | awk "NR == $TASKID {printf(\"%s/%s${ext}\n\", \"$idir\", gensub(\".${input_data_type}(.gz)*\", \"\", \$1));}" + ## deal with fq/fastq extension + if [ ${input_data_type} == "fastq" ]; then + pattern=".fastq(.gz)*$|.fq(.gz)*$" + else + pattern=".${input_data_type}$" + fi + ## raw data for mapping + if [[ $ext == ".fastq" || $ext == ".fq" ]]; then + cat $FASTQFILE | filter_rawdir | filter_pairs | awk "NR == $TASKID && \$1 ~ \"${ext}(.gz)*$\"{printf(\"%s/%s${ext}\n\", \"$idir\", gensub(\"${ext}(.gz)*$\", \"\", \$1));}" + else + cat $FASTQFILE | filter_rawdir | filter_pairs | awk "NR == $TASKID {printf(\"%s/%s${ext}\n\", \"$idir\", gensub(\"${pattern}\", \"\", \$1));}" + fi return fi local list= for fastq in $(cat $FASTQFILE | filter_rawdir ); do - get_hic_files_build_list + if [[ ${ext} == ".fastq" || ${ext} == ".fq" ]] + then + if [[ $fastq =~ "${ext}" ]] + then + get_hic_files_build_list + fi + else + get_hic_files_build_list + fi done echo "$list" | filter_pairs elif [ ! -z "$FASTQLIST" ]; then local list= for fastq in $(echo $FASTQLIST | filter_rawdir | sed -e 's/[,;]/ /g'); do - get_hic_files_build_list + if [[ ${ext} == ".fastq" || ${ext} == ".fq" ]] + then + if [[ $fastq =~ "${ext}" ]] + then + get_hic_files_build_list + fi + else + get_hic_files_build_list + fi done echo "$list" | filter_pairs else @@ -227,8 +254,8 @@ get_fastq_for_bowtie_global() then ifastq=$(get_hic_files $RAW_DIR .fastq | grep "$PAIR1_EXT") ifq=$(get_hic_files $RAW_DIR .fq | grep "$PAIR1_EXT") + echo "$ifastq $ifq" fi - echo "$ifastq $ifq" } get_fastq_for_bowtie_local() diff --git a/scripts/make_lsf_script.sh b/scripts/make_lsf_script.sh index b881af6..6f652ff 100755 --- a/scripts/make_lsf_script.sh +++ b/scripts/make_lsf_script.sh @@ -43,7 +43,9 @@ unset FASTQFILE if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* ]] then inputfile=inputfiles_${JOB_NAME}.txt - get_hic_files $RAW_DIR .fastq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile + ifq=$(get_hic_files $RAW_DIR .fq) + ifastq=$(get_hic_files $RAW_DIR .fastq) + echo -e "$ifq\n$ifastq" | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile count=$(cat $inputfile | wc -l) elif [[ $MAKE_OPTS == *"proc_hic"* ]] then diff --git a/scripts/make_sge_script.sh b/scripts/make_sge_script.sh index c1d888e..78a0204 100755 --- a/scripts/make_sge_script.sh +++ b/scripts/make_sge_script.sh @@ -41,7 +41,9 @@ unset FASTQFILE if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* ]] then inputfile=inputfiles_${JOB_NAME}.txt - get_hic_files $RAW_DIR .fastq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile + ifq=$(get_hic_files $RAW_DIR .fq) + ifastq=$(get_hic_files $RAW_DIR .fastq) + echo -e "$ifq\n$ifastq" | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile count=$(cat $inputfile | wc -l) elif [[ $MAKE_OPTS == *"proc_hic"* ]] then diff --git a/scripts/make_slurm_script.sh b/scripts/make_slurm_script.sh index 501e974..f0f1b33 100755 --- a/scripts/make_slurm_script.sh +++ b/scripts/make_slurm_script.sh @@ -41,7 +41,9 @@ unset FASTQFILE if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* ]] then inputfile=inputfiles_${JOB_NAME}.txt - get_hic_files $RAW_DIR .fastq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile + ifq=$(get_hic_files $RAW_DIR .fq) + ifastq=$(get_hic_files $RAW_DIR .fastq) + echo -e "$ifq\n$ifastq" | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile count=$(cat $inputfile | wc -l) elif [[ $MAKE_OPTS == *"proc_hic"* ]] then diff --git a/scripts/make_torque_script.sh b/scripts/make_torque_script.sh index 8b17950..80ca806 100755 --- a/scripts/make_torque_script.sh +++ b/scripts/make_torque_script.sh @@ -42,13 +42,9 @@ if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* ]] then input_data_type=$(get_data_type) inputfile=inputfiles_${JOB_NAME}.txt - if [ $input_data_type == "fastq" ] - then - get_hic_files $RAW_DIR .fastq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile - elif [ $input_data_type == "fq" ] - then - get_hic_files $RAW_DIR .fq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile - fi + ifq=$(get_hic_files $RAW_DIR .fq) + ifastq=$(get_hic_files $RAW_DIR .fastq) + echo -e "$ifq\n$ifastq" | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile count=$(cat $inputfile | wc -l) elif [[ $MAKE_OPTS == *"proc_hic"* ]] then