marlaux · marlaux · Feb 21, 2023 · Feb 21, 2023 · Feb 21, 2023 · Feb 21, 2023
diff --git a/1_merge/merge_pear_auto.sh → 1_merge/1_merge_pear.sh b/1_merge/merge_pear_auto.sh → 1_merge/1_merge_pear.sh
@@ -1,7 +1,4 @@
 #/bin/bash
-module --quiet purge
-module load StdEnv
-module load PEAR/0.9.11-GCCcore-9.3.0
 
 INPUT_F=''
 INPUT_R=''
@@ -28,7 +25,7 @@ usage () {
         echo "-h     print this help"
         echo " "
         echo "##################################################"
-                1>&2; exit 1;
+               2>/dev/null; exit 1; 
 
 }
 
@@ -47,20 +44,24 @@ while getopts "f:r:o:p:s:t:h" option; do
 	t) THREADS="${OPTARG}"
                ;;
         h | *) usage
-                exit 0
+                2>/dev/null; exit 0
                 ;;
         \?) echo "Invalid option: -$OPTARG"
-                exit 1
+                2>/dev/null; exit 1
                 ;;
    esac
 done
 
 if [ -z "$INPUT_F" ] || [ -z "$INPUT_R" ] || [ -z "$OUTPUT" ] ; then
-        echo 'Missing argument' >&2
+        echo 'Missing argument' 2>/dev/null
         exit 1
 fi
 
-	pear -j ${THREADS}	\
+module --quiet purge
+module load StdEnv
+module load PEAR/0.9.11-GCCcore-9.3.0
+
+pear -j ${THREADS}	\
 		 -p ${PVALUE}	\
 		 -v ${OVERLAP}	\
 		 -q ${QUAL}	\

diff --git a/1_merge/README.merge b/1_merge/README.merge
@@ -1,15 +1,19 @@
-./merge_pear_auto.sh -h
-
-Usage: ./merge_pear_auto.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 4]
+./1_merge_pear.sh -h
+##################################################
+Merging of paired end fastq files from Illumina sequencing using Pear.
+
+Usage: ./1_merge_pear.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 4]
 -f     R1.fastq original file
 -r     R2.fastq original file
 -o     output name for the assembled fastq file
 -p     p-value: statistical test for true assembly. Lower p-value means less possibility of overlapping by chance. Options are: 0.0001, 0.001, 0.01, 0.05 and 1.0
 -s     minimum overlap size.
 -t     threads
 -h     print this help
-
 ##################################################
+Output:
+my_data.assembled.fastq
+
 MERGING STEP -> PEAR 
 IF YOU WOULD LIKE TO EDIT THE SCRIPT BY YOURSELF:
 1. Open the merging_pear.sh and replace the name of your original fastq files R1 and R2

diff --git a/1_merge/run_merge_pear.slurm b/1_merge/run_merge_pear.slurm
@@ -18,6 +18,6 @@ set -o nounset
 
 ./merge_pear.sh
 #or
-#./merge_pear_auto.sh -f R1.fq -r R2.fq -o output -p 0.001 -s 20 -t 8
+#./1_merge_pear.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 8]
 
 exit 0
diff --git a/2_demulti/preparing_tags_LCPI.pl → 2_demulti/2a_preparing_tags_LCPI.pl b/2_demulti/preparing_tags_LCPI.pl → 2_demulti/2a_preparing_tags_LCPI.pl
@@ -5,17 +5,12 @@
 package ECHO_MODULE;
 package main;
 
-print "#####DEMULTIPLEXING DUAL INDEXED LIBRARIES#####\n";
-print "EXPECTED INPUT:\nsample1\ttagF\ttagR\nsample2\ttagF\ttagR\n...\t#same as in your excel file\n##DO NOT INCLUDE PRIMERS NOW\n\n";
-print "Please, enter your mapping file to edit:\t";
-my $arq1 = <STDIN>;
-chomp $arq1;
-open (MYFILE, $arq1);
+my $file = $ARGV[0];
+my $subname = $ARGV[1];
+open (MYFILE, $file);
 my @file = <MYFILE>;
 close (MYFILE);
 
-print ">>>For Illumina merged reads type 'linked'\n>>>For Illumina combinatorial type 'combinatorial'\n>>>For Illumina exact paired dual index type 'unique'\n>>>For Ion torrent dual index type 'ion'\n>>>For Ion dual index 3' anchored 'ion3'\n>>>For Ion dual index 5' anchored 'ion5'\n>>>For Ion dual index both anchored 'ion-both'\t";
-chop (my $subname = <STDIN>);
 if ($subname eq 'linked') {
 	&linked;
 }
@@ -34,18 +29,20 @@ package main;
 if ($subname eq 'ion5')       {
         &ion5;
 }
-elsif ($subname eq 'ion-both')	{
+elsif ($subname eq 'ionboth')	{
 	&ionboth;
 }
 
 sub linked
 {
-open (NEW_FILE1, '>>Barcodes_LA1.fa');
-open (NEW_FILE2, '>>Barcodes_LA2.fa');
-open (NEW_FILE3, '>>Barcodes_LA3.fa');
+open (NEW_FILE1, '>>Tags_LA1.fa');
+open (NEW_FILE2, '>>Tags_LA2.fa');
+open (NEW_FILE3, '>>Tags_LA3.fa');
+open (NEW_FILE4, '>>Tags_LA4.fa');
 my @new_file1=();
 my @new_file2=();
 my @new_file3=();
+my @new_file4=();
         foreach my $line (@file) {
 			chomp ($line);
 			$line =~ s/\R//g;
@@ -62,15 +59,17 @@ sub linked
 			push (@new_file1, (">$sample\n^$tag_F...$tag_R\$\n"));	
 			push (@new_file2, (">$sample\n^$RCtagR...$RCtagF\$\n"));
 			push (@new_file3, (">$sample\n^$tag_F...$RCtagR\$\n"));
+			push (@new_file4, (">$sample\n^$tag_R...$RCtagF\$\n"));
 			}
         print NEW_FILE1 @new_file1;
 	print NEW_FILE2 @new_file2;
 	print NEW_FILE3 @new_file3;
+	print NEW_FILE4 @new_file4;
 }
 sub combinatorial
 {
-open (NEW_FILE1, '>>Barcodes_F.fa');
-open (NEW_FILE2, '>>Barcodes_R.fa');
+open (NEW_FILE1, '>>Tags_F.fa');
+open (NEW_FILE2, '>>Tags_R.fa');
 my @new_file1=();
 my @new_file2=();
         foreach my $line (@file) {
@@ -88,10 +87,10 @@ sub combinatorial
 }
 sub unique
 {
-open (NEW_FILE1, '>>Barcode_R1.fa');
-open (NEW_FILE2, '>>Barcode_R2.fa');
-open (NEW_FILE3, '>>Barcode_R1_RC.fa');
-open (NEW_FILE4, '>>Barcode_R2_RC.fa');
+open (NEW_FILE1, '>>Tags_R1.fa');
+open (NEW_FILE2, '>>Tags_R2.fa');
+open (NEW_FILE3, '>>Tags_R1_RC.fa');
+open (NEW_FILE4, '>>Tags_R2_RC.fa');
 my @new_file1=();
 my @new_file2=();
 my @new_file3=();
@@ -120,10 +119,10 @@ sub unique
 }
 sub ion
 {
-open (ALT1, '>>Barcodes_alt1.fa');
-open (ALT2, '>>Barcodes_alt2.fa');
-open (ALT3, '>>Barcodes_alt3.fa');
-open (ALT4, '>>Barcodes_alt4.fa');
+open (ALT1, '>>Tags_alt1.fa');
+open (ALT2, '>>Tags_alt2.fa');
+open (ALT3, '>>Tags_alt3.fa');
+open (ALT4, '>>Tags_alt4.fa');
 my @tags_alt_1=();
 my @tags_alt_2=();
 my @tags_alt_3=();
@@ -153,10 +152,10 @@ sub ion
 }
 sub ion3
 {
-open (ALT1, '>>Barcodes_alt1_3anch.fa');
-open (ALT2, '>>Barcodes_alt2_3anch.fa');
-open (ALT3, '>>Barcodes_alt3_3anch.fa');
-open (ALT4, '>>Barcodes_alt4_3anch.fa');
+open (ALT1, '>>Tags_alt1_3anch.fa');
+open (ALT2, '>>Tags_alt2_3anch.fa');
+open (ALT3, '>>Tags_alt3_3anch.fa');
+open (ALT4, '>>Tags_alt4_3anch.fa');
 my @tags_alt_1_3anch=();
 my @tags_alt_2_3anch=();
 my @tags_alt_3_3anch=();
@@ -186,10 +185,10 @@ sub ion3
 }
 sub ion5
 {
-open (ALT1, '>>Barcodes_alt1_5anch.fa');
-open (ALT2, '>>Barcodes_alt2_5anch.fa');
-open (ALT3, '>>Barcodes_alt3_5anch.fa');
-open (ALT4, '>>Barcodes_alt4_5anch.fa');
+open (ALT1, '>>Tags_alt1_5anch.fa');
+open (ALT2, '>>Tags_alt2_5anch.fa');
+open (ALT3, '>>Tags_alt3_5anch.fa');
+open (ALT4, '>>Tags_alt4_5anch.fa');
 my @tags_alt_1_5anch=();
 my @tags_alt_2_5anch=();
 my @tags_alt_3_5anch=();
@@ -219,10 +218,10 @@ sub ion5
 }
 sub ionboth
 {
-open (ALT1, '>>Barcodes_alt1_bothanch.fa');
-open (ALT2, '>>Barcodes_alt2_bothanch.fa');
-open (ALT3, '>>Barcodes_alt3_bothanch.fa');
-open (ALT4, '>>Barcodes_alt4_bothanch.fa');
+open (ALT1, '>>Tags_alt1_bothanch.fa');
+open (ALT2, '>>Tags_alt2_bothanch.fa');
+open (ALT3, '>>Tags_alt3_bothanch.fa');
+open (ALT4, '>>Tags_alt4_bothanch.fa');
 my @tags_alt_1_bothanch=();
 my @tags_alt_2_bothanch=();
 my @tags_alt_3_bothanch=();

diff --git a/2_demulti/2b_demulti_dual_index_ionboth.sh b/2_demulti/2b_demulti_dual_index_ionboth.sh
@@ -0,0 +1,45 @@
+#/bin/bash
+##RUN preparing_tags_LCPI.pl to format your barcodes files.
+#input mapping file format:
+#Sample1    tagF      tagR
+#Sample2  ACCTGAAT  ATACAGA
+####tab delimited!
+#check this mapping file for duplicates in excel before sending to cluster
+#write sample names without space, e.g sample 23 as sample_23 or sample23.
+####DO NOT USE NUMBERS in the beginning of your sample names
+#perl preparing_tags_LCPI.pl
+                #my_mapping_file.txt
+                                #linked
+#the perl script should create 3 barcode files, Barcodes_LA1.txt, Barcodes_LA2.txt, Barcodes_LA3.txt for 'linked'
+#the linked mode is 5' and 3' anchored
+
+#ANY CUTADAPT ISSUE OR DOUBTS, SEE: https://cutadapt.readthedocs.io/en/stable/guide.html
+
+module --quiet purge
+module load StdEnv
+module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
+
+INPUT="${1}"
+ERR="${2}"
+PAIR1="Tags_alt1_bothanch.fa"
+PAIR2="Tags_alt1_bothanch.fa"
+PAIR3="Tags_alt1_bothanch.fa"
+PAIR4="Tags_alt1_bothanch.fa"
+
+### demultiplex (Linked Adapter)
+
+cutadapt	\
+	--quiet	\
+        -a file:${PAIR1}        \
+        -a file:${PAIR2}        \
+        -a file:${PAIR3}        \
+	-a file:${PAIR4}        \
+        -o "{name}_Ion_LA.fq"  \
+	-e ${ERR}	\
+        --action=lowercase      \
+        ${INPUT}
+
+mkdir demulti_ionboth_${ERR}err		
+mv *.fq demulti_ionboth_${ERR}err
+./count_fastq_sequences.sh demulti_ionboth_${ERR}err/*.fq > demulti_ionboth_${ERR}err_count.txt
+
diff --git a/2_demulti/demulti_dual_index_linked.sh → 2_demulti/2b_demulti_dual_index_linked.sh b/2_demulti/demulti_dual_index_linked.sh → 2_demulti/2b_demulti_dual_index_linked.sh
@@ -1,40 +1,45 @@
 #/bin/bash
-##FOR DUAL INDEX DESIGN, YOU MUST RUN preparing_tags_LCPI.pl to format your barcodes files.
+##RUN preparing_tags_LCPI.pl to format your barcodes files.
 #input mapping file format:
 #Sample1    tagF      tagR
 #Sample2  ACCTGAAT  ATACAGA
-#tab delimited!
+####tab delimited!
 #check this mapping file for duplicates in excel before sending to cluster
-#write sample names without space, e.g SamPLe 23 is not allowed, but SamPLe_23 is.
-#do not use numbers in the beginning of your sample names
+#write sample names without space, e.g sample 23 as sample_23 or sample23.
+####DO NOT USE NUMBERS in the beginning of your sample names
 #perl preparing_tags_LCPI.pl
                 #my_mapping_file.txt
                                 #linked
 #the perl script should create 3 barcode files, Barcodes_LA1.txt, Barcodes_LA2.txt, Barcodes_LA3.txt for 'linked'
-#the linked mode is 5' and 2' anchored by default
+#the linked mode is 5' and 3' anchored
 
 #ANY CUTADAPT ISSUE OR DOUBTS, SEE: https://cutadapt.readthedocs.io/en/stable/guide.html
 
-
 module --quiet purge
 module load StdEnv
 module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
 
-
-INPUT="my_training_set.assembled.fastq"
-PAIR1="Barcodes_LA1.fa"
-PAIR2="Barcodes_LA2.fa"
-PAIR3="Barcodes_LA3.fa"
+INPUT="${1}"
+ERR="${2}"
+PAIR1="Tags_LA1.fa"
+PAIR2="Tags_LA2.fa"
+PAIR3="Tags_LA3.fa"
+PAIR4="Tags_LA4.fa"
 
 ### demultiplex (Linked Adapter)
 
 cutadapt	\
+	--quiet	\
         -a file:${PAIR1}        \
         -a file:${PAIR2}        \
         -a file:${PAIR3}        \
+	-a file:${PAIR4}        \
         -o "{name}_LA.fq"  \
+	-e ${ERR}	\
         --action=lowercase      \
         ${INPUT}
 
-mkdir demulti_linked		
-mv *.fq demulti_linked
+mkdir demulti_linked_${ERR}err		
+mv *.fq demulti_linked_${ERR}err
+./count_fastq_sequences.sh demulti_linked_${ERR}err/*.fq > demulti_linked_${ERR}err_count.txt
+
diff --git a/2_demulti/README.demulti b/2_demulti/README.demulti
@@ -1,3 +1,9 @@
+#########################################################################
+> perl 2a_preparing_tags_LCPI.pl my_mapping_file.txt linked
+Output:
+Tags_LA1.fa   Tags_LA2.fa   Tags_LA3.fa   Tags_LA4.fa
+> ./2b_demulti_dual_index_linked.sh my_data.assembled.fastq
+#########################################################################
 If you are not familiar with this step, please read all the orientation:
 For all demultiplexing strategies based on cutadapt the mapping file must be a tab separated text file like this:
 
@@ -6,21 +12,8 @@ sample1   	AGGTACGCAATT	CCTAAACTACGG
 sample2    	ACAGCCACCCAT	CCTAAACTACGG
 sample3    	TGTCTCGCAAGC	CCTAAACTACGG
 
-then we format it according to your tag primer design using the perl script preparing_tags_LCPI.pl. The dominant tag orientation for Illumina dual index merged pairs: (3'tagF)...(5'RCtagR)
-perl preparing_tags_LCPI.pl
-#####DEMULTIPLEXING DUAL INDEXED LIBRARIES#####
-EXPECTED INPUT:
-sample1 tagF    tagR
-sample2 tagF    tagR
-...     #same as in your excel file
-##DO NOT INCLUDE PRIMERS NOW
-
-Please, enter your mapping file to edit: my_mapping_file.txt
->>>For Illumina merged reads type 'linked'
->>>For Illumina combinatorial type 'combinatorial'
->>>For Illumina exact paired dual index type 'unique'
->>>For Ion torrent dual index type 'ion'
->>> type your mode
+then we format it according to your tag primer design using the perl script preparing_tags_LCPI.pl. 
+The dominant tag orientation for Illumina dual index merged pairs: (3'tagF)...(5'RCtagR)
 
 for merged R1-R2 Illumina files in linked mode, the tags ar formatted like this:
 head Barcodes_LA.fa

diff --git a/2_demulti/demulti_dual_index_ion.sh b/2_demulti/demulti_dual_index_ion.sh