From 33d1ca88a9fea572d3cf9119adc4e07289bc6eae Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:16:04 -0300
Subject: [PATCH 01/13] Delete merge_pear_auto.sh

---
 1_merge/merge_pear_auto.sh | 72 --------------------------------------
 1 file changed, 72 deletions(-)
 delete mode 100644 1_merge/merge_pear_auto.sh

diff --git a/1_merge/merge_pear_auto.sh b/1_merge/merge_pear_auto.sh
deleted file mode 100644
index cbca7d3..0000000
--- a/1_merge/merge_pear_auto.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#/bin/bash
-module --quiet purge
-module load StdEnv
-module load PEAR/0.9.11-GCCcore-9.3.0
-
-INPUT_F=''
-INPUT_R=''
-THREADS='1'
-PVALUE='1'
-OVERLAP='1'
-OUTPUT=''
-UNCALLED='0'
-QUAL='20'
-
-
-usage () {
-        echo "##################################################"
-        echo " "
-	echo "Merging of paired end fastq files from Illumina sequencing using Pear."
-	echo " "
-        echo "Usage: ${0} [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 4]"
-        echo "-f     R1.fastq original file"
-        echo "-r     R2.fastq original file"
-        echo "-o     output name for the assembled fastq file"
-        echo "-p     p-value: statistical test for true assembly. Lower p-value means less possibility of overlapping by chance. Options are: 0.0001, 0.001, 0.01, 0.05 and 1.0"
-        echo "-s     minimum overlap size."
-        echo "-t     threads"
-        echo "-h     print this help"
-        echo " "
-        echo "##################################################"
-                1>&2; exit 1;
-
-}
-
-while getopts "f:r:o:p:s:t:h" option; do
-        case $option in
-        f) INPUT_F="${OPTARG}"
-                ;;
-        r) INPUT_R="${OPTARG}"
-                ;;
-        o) OUTPUT="${OPTARG}"
-                ;;
-        p) PVALUE="${OPTARG}"
-               ;;
-        s) OVERLAP="${OPTARG}"
-               ;;
-	t) THREADS="${OPTARG}"
-               ;;
-        h | *) usage
-                exit 0
-                ;;
-        \?) echo "Invalid option: -$OPTARG"
-                exit 1
-                ;;
-   esac
-done
-
-if [ -z "$INPUT_F" ] || [ -z "$INPUT_R" ] || [ -z "$OUTPUT" ] ; then
-        echo 'Missing argument' >&2
-        exit 1
-fi
-
-	pear -j ${THREADS}	\
-		 -p ${PVALUE}	\
-		 -v ${OVERLAP}	\
-		 -q ${QUAL}	\
-		 -u ${UNCALLED}	\
-		 -f ${INPUT_F}	\
-		 -r ${INPUT_R}	\
-		 -o ${OUTPUT} 
-
-

From 3ec7277f6c6d1e387fbc7205df7947c89ce32a5e Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:17:28 -0300
Subject: [PATCH 02/13] Updated executable

type ./1_merge_pear.sh -h
---
 1_merge/1_merge_pear.sh | 73 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 1_merge/1_merge_pear.sh

diff --git a/1_merge/1_merge_pear.sh b/1_merge/1_merge_pear.sh
new file mode 100644
index 0000000..ff88f4c
--- /dev/null
+++ b/1_merge/1_merge_pear.sh
@@ -0,0 +1,73 @@
+#/bin/bash
+
+INPUT_F=''
+INPUT_R=''
+THREADS='1'
+PVALUE='1'
+OVERLAP='1'
+OUTPUT=''
+UNCALLED='0'
+QUAL='20'
+
+
+usage () {
+        echo "##################################################"
+        echo " "
+	echo "Merging of paired end fastq files from Illumina sequencing using Pear."
+	echo " "
+        echo "Usage: ${0} [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 4]"
+        echo "-f     R1.fastq original file"
+        echo "-r     R2.fastq original file"
+        echo "-o     output name for the assembled fastq file"
+        echo "-p     p-value: statistical test for true assembly. Lower p-value means less possibility of overlapping by chance. Options are: 0.0001, 0.001, 0.01, 0.05 and 1.0"
+        echo "-s     minimum overlap size."
+        echo "-t     threads"
+        echo "-h     print this help"
+        echo " "
+        echo "##################################################"
+               2>/dev/null; exit 1; 
+
+}
+
+while getopts "f:r:o:p:s:t:h" option; do
+        case $option in
+        f) INPUT_F="${OPTARG}"
+                ;;
+        r) INPUT_R="${OPTARG}"
+                ;;
+        o) OUTPUT="${OPTARG}"
+                ;;
+        p) PVALUE="${OPTARG}"
+               ;;
+        s) OVERLAP="${OPTARG}"
+               ;;
+	t) THREADS="${OPTARG}"
+               ;;
+        h | *) usage
+                2>/dev/null; exit 0
+                ;;
+        \?) echo "Invalid option: -$OPTARG"
+                2>/dev/null; exit 1
+                ;;
+   esac
+done
+
+if [ -z "$INPUT_F" ] || [ -z "$INPUT_R" ] || [ -z "$OUTPUT" ] ; then
+        echo 'Missing argument' 2>/dev/null
+        exit 1
+fi
+
+module --quiet purge
+module load StdEnv
+module load PEAR/0.9.11-GCCcore-9.3.0
+
+pear -j ${THREADS}	\
+		 -p ${PVALUE}	\
+		 -v ${OVERLAP}	\
+		 -q ${QUAL}	\
+		 -u ${UNCALLED}	\
+		 -f ${INPUT_F}	\
+		 -r ${INPUT_R}	\
+		 -o ${OUTPUT} 
+
+

From aab00b2da8f277d2b6d8f52e9d82633906a3db9c Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:26:49 -0300
Subject: [PATCH 03/13] Update run_merge_pear.slurm

---
 1_merge/run_merge_pear.slurm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/1_merge/run_merge_pear.slurm b/1_merge/run_merge_pear.slurm
index 10ae6b5..78ff266 100644
--- a/1_merge/run_merge_pear.slurm
+++ b/1_merge/run_merge_pear.slurm
@@ -18,6 +18,6 @@ set -o nounset
 
 ./merge_pear.sh
 #or
-#./merge_pear_auto.sh -f R1.fq -r R2.fq -o output -p 0.001 -s 20 -t 8
+#./1_merge_pear.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 8]
 
 exit 0

From e8d7307a147948cb271d0c58293692ea2473b8da Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:28:07 -0300
Subject: [PATCH 04/13] Update README.merge

---
 1_merge/README.merge | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/1_merge/README.merge b/1_merge/README.merge
index ca48133..8aab94f 100644
--- a/1_merge/README.merge
+++ b/1_merge/README.merge
@@ -1,6 +1,8 @@
-./merge_pear_auto.sh -h
- 
-Usage: ./merge_pear_auto.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 4]
+./1_merge_pear.sh -h
+##################################################
+Merging of paired end fastq files from Illumina sequencing using Pear.
+
+Usage: ./1_merge_pear.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-t 4]
 -f     R1.fastq original file
 -r     R2.fastq original file
 -o     output name for the assembled fastq file
@@ -8,8 +10,8 @@ Usage: ./merge_pear_auto.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20]
 -s     minimum overlap size.
 -t     threads
 -h     print this help
-
 ##################################################
+
 MERGING STEP -> PEAR 
 IF YOU WOULD LIKE TO EDIT THE SCRIPT BY YOURSELF:
 1. Open the merging_pear.sh and replace the name of your original fastq files R1 and R2

From ee2b0ec668e14079c8d6ffc407926137d4ebe52a Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:32:02 -0300
Subject: [PATCH 05/13] updated executables

---
 2_demulti/2a_preparing_tags_LCPI.pl        | 252 +++++++++++++++++++++
 2_demulti/2b_demulti_dual_index_ionboth.sh |  45 ++++
 2_demulti/2b_demulti_dual_index_linked.sh  |  45 ++++
 3 files changed, 342 insertions(+)
 create mode 100644 2_demulti/2a_preparing_tags_LCPI.pl
 create mode 100644 2_demulti/2b_demulti_dual_index_ionboth.sh
 create mode 100644 2_demulti/2b_demulti_dual_index_linked.sh

diff --git a/2_demulti/2a_preparing_tags_LCPI.pl b/2_demulti/2a_preparing_tags_LCPI.pl
new file mode 100644
index 0000000..1b06b8c
--- /dev/null
+++ b/2_demulti/2a_preparing_tags_LCPI.pl
@@ -0,0 +1,252 @@
+#!/usr/bin/perl
+use diagnostics;
+use warnings;
+use strict;
+package ECHO_MODULE;
+package main;
+
+my $file = $ARGV[0];
+my $subname = $ARGV[1];
+open (MYFILE, $file);
+my @file = <MYFILE>;
+close (MYFILE);
+
+if ($subname eq 'linked') {
+	&linked;
+}
+if ($subname eq 'combinatorial')	{
+	&combinatorial;
+}
+if ($subname eq 'unique')    {
+        &unique;
+}
+if ($subname eq 'ion')       {
+        &ion;
+}
+if ($subname eq 'ion3')       {
+        &ion3;
+}
+if ($subname eq 'ion5')       {
+        &ion5;
+}
+elsif ($subname eq 'ionboth')	{
+	&ionboth;
+}
+
+sub linked
+{
+open (NEW_FILE1, '>>Tags_LA1.fa');
+open (NEW_FILE2, '>>Tags_LA2.fa');
+open (NEW_FILE3, '>>Tags_LA3.fa');
+open (NEW_FILE4, '>>Tags_LA4.fa');
+my @new_file1=();
+my @new_file2=();
+my @new_file3=();
+my @new_file4=();
+        foreach my $line (@file) {
+			chomp ($line);
+			$line =~ s/\R//g;
+                        my @fields= split(/\t/, $line);
+			my $sample=$fields[0];
+			my $tag_F=$fields[1];
+			my $tag_R=$fields[2];
+			my $RCtagF=reverse $tag_F;
+			$RCtagF =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
+			my $RCtagR=reverse $tag_R;
+			$RCtagR =~ tr/ATGCatgc/TACGtacg/; 
+			$RCtagR =~ s/[^\p{PosixAlnum},]//g;
+			push (@new_file1, (">$sample\n^$tag_F...$tag_R\$\n"));	
+			push (@new_file2, (">$sample\n^$RCtagR...$RCtagF\$\n"));
+			push (@new_file3, (">$sample\n^$tag_F...$RCtagR\$\n"));
+			push (@new_file4, (">$sample\n^$tag_R...$RCtagF\$\n"));
+			}
+        print NEW_FILE1 @new_file1;
+	print NEW_FILE2 @new_file2;
+	print NEW_FILE3 @new_file3;
+	print NEW_FILE4 @new_file4;
+}
+sub combinatorial
+{
+open (NEW_FILE1, '>>Tags_F.fa');
+open (NEW_FILE2, '>>Tags_R.fa');
+my @new_file1=();
+my @new_file2=();
+        foreach my $line (@file) {
+                        chomp ($line);
+                        $line =~ s/\R//g;
+			my @fields= split(/\t/, $line);
+                        my $sample=$fields[0];
+                        my $tag_F=$fields[1];
+                        my $tag_R=$fields[2];
+			push (@new_file1, (">$sample\n$tag_F\n"));
+                        push (@new_file2, (">$sample\n^$tag_R\n"));
+			}
+        print NEW_FILE1 @new_file1;
+        print NEW_FILE2 @new_file2;
+}
+sub unique
+{
+open (NEW_FILE1, '>>Tags_R1.fa');
+open (NEW_FILE2, '>>Tags_R2.fa');
+open (NEW_FILE3, '>>Tags_R1_RC.fa');
+open (NEW_FILE4, '>>Tags_R2_RC.fa');
+my @new_file1=();
+my @new_file2=();
+my @new_file3=();
+my @new_file4=();
+        foreach my $line (@file) {
+                        chomp ($line);
+                        my @fields= split(/\t/, $line);
+                        my $sample=$fields[0];
+                        my $tag_F=$fields[1];
+                        my $tag_R=$fields[2];
+                        my $RCtagF=reverse $tag_F;
+                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagR=reverse $tag_R;
+                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
+                        push (@new_file1, (">$sample\n$tag_F\n"));
+                        push (@new_file2, (">$sample\n$tag_R\n"));
+                        push (@new_file3, (">$sample\n$RCtagF\n"));
+			push (@new_file4, (">$sample\n$RCtagR\n"));
+                        }
+        print NEW_FILE1 @new_file1;
+        print NEW_FILE2 @new_file2;
+        print NEW_FILE3 @new_file3;
+	print NEW_FILE4 @new_file4;
+}
+sub ion
+{
+open (ALT1, '>>Tags_alt1.fa');
+open (ALT2, '>>Tags_alt2.fa');
+open (ALT3, '>>Tags_alt3.fa');
+open (ALT4, '>>Tags_alt4.fa');
+my @tags_alt_1=();
+my @tags_alt_2=();
+my @tags_alt_3=();
+my @tags_alt_4=();
+	foreach my $line (@file) {
+                        my @fields= split(/\t/, $line);
+                        my $sample=$fields[0];
+                        my $tag_F=$fields[1];
+                        my $tag_R=$fields[2];
+			$tag_F =~ s/[^\p{PosixAlnum},]//g;
+			$tag_R =~ s/[^\p{PosixAlnum},]//g;
+			my $RCtagF=reverse $tag_F;
+                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagR=reverse $tag_R;
+                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
+                        push (@tags_alt_1, (">$sample\n$tag_F...$RCtagR\n"));
+			push (@tags_alt_2, (">$sample\n$RCtagF...$tag_R\n"));
+			push (@tags_alt_3, (">$sample\n$tag_R...$RCtagF\n"));
+			push (@tags_alt_4, (">$sample\n$RCtagR...$tag_F\n"));
+                        }
+        print ALT1 @tags_alt_1;
+	print ALT2 @tags_alt_2;
+	print ALT3 @tags_alt_3;
+	print ALT4 @tags_alt_4;
+}
+sub ion3
+{
+open (ALT1, '>>Tags_alt1_3anch.fa');
+open (ALT2, '>>Tags_alt2_3anch.fa');
+open (ALT3, '>>Tags_alt3_3anch.fa');
+open (ALT4, '>>Tags_alt4_3anch.fa');
+my @tags_alt_1_3anch=();
+my @tags_alt_2_3anch=();
+my @tags_alt_3_3anch=();
+my @tags_alt_4_3anch=();
+        foreach my $line (@file) {
+                        my @fields= split(/\t/, $line);
+                        my $sample=$fields[0];
+                        my $tag_F=$fields[1];
+                        my $tag_R=$fields[2];
+                        $tag_F =~ s/[^\p{PosixAlnum},]//g;
+                        $tag_R =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagF=reverse $tag_F;
+                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagR=reverse $tag_R;
+                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
+                        push (@tags_alt_1_3anch, (">$sample\n$tag_F...$RCtagR\$\n"));
+                        push (@tags_alt_2_3anch, (">$sample\n$RCtagF...$tag_R\$\n"));
+                        push (@tags_alt_3_3anch, (">$sample\n$tag_R...$RCtagF\$\n"));
+                        push (@tags_alt_4_3anch, (">$sample\n$RCtagR...$tag_F\$\n"));
+                        }
+        print ALT1 @tags_alt_1_3anch;
+        print ALT2 @tags_alt_2_3anch;
+        print ALT3 @tags_alt_3_3anch;
+        print ALT4 @tags_alt_4_3anch;
+}
+sub ion5
+{
+open (ALT1, '>>Tags_alt1_5anch.fa');
+open (ALT2, '>>Tags_alt2_5anch.fa');
+open (ALT3, '>>Tags_alt3_5anch.fa');
+open (ALT4, '>>Tags_alt4_5anch.fa');
+my @tags_alt_1_5anch=();
+my @tags_alt_2_5anch=();
+my @tags_alt_3_5anch=();
+my @tags_alt_4_5anch=();
+        foreach my $line (@file) {
+                        my @fields= split(/\t/, $line);
+                        my $sample=$fields[0];
+                        my $tag_F=$fields[1];
+                        my $tag_R=$fields[2];
+                        $tag_F =~ s/[^\p{PosixAlnum},]//g;
+                        $tag_R =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagF=reverse $tag_F;
+                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagR=reverse $tag_R;
+                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
+                        push (@tags_alt_1_5anch, (">$sample\n^$tag_F...$RCtagR\n"));
+                        push (@tags_alt_2_5anch, (">$sample\n^$RCtagF...$tag_R\n"));
+                        push (@tags_alt_3_5anch, (">$sample\n^$tag_R...$RCtagF\n"));
+                        push (@tags_alt_4_5anch, (">$sample\n^$RCtagR...$tag_F\n"));
+                        }
+        print ALT1 @tags_alt_1_5anch;
+        print ALT2 @tags_alt_2_5anch;
+        print ALT3 @tags_alt_3_5anch;
+        print ALT4 @tags_alt_4_5anch;
+}
+sub ionboth
+{
+open (ALT1, '>>Tags_alt1_bothanch.fa');
+open (ALT2, '>>Tags_alt2_bothanch.fa');
+open (ALT3, '>>Tags_alt3_bothanch.fa');
+open (ALT4, '>>Tags_alt4_bothanch.fa');
+my @tags_alt_1_bothanch=();
+my @tags_alt_2_bothanch=();
+my @tags_alt_3_bothanch=();
+my @tags_alt_4_bothanch=();
+        foreach my $line (@file) {
+                        my @fields= split(/\t/, $line);
+                        my $sample=$fields[0];
+                        my $tag_F=$fields[1];
+                        my $tag_R=$fields[2];
+                        $tag_F =~ s/[^\p{PosixAlnum},]//g;
+                        $tag_R =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagF=reverse $tag_F;
+                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
+                        my $RCtagR=reverse $tag_R;
+                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
+                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
+                        push (@tags_alt_1_bothanch, (">$sample\n^$tag_F...$RCtagR\$\n"));
+                        push (@tags_alt_2_bothanch, (">$sample\n^$RCtagF...$tag_R\$\n"));
+                        push (@tags_alt_3_bothanch, (">$sample\n^$tag_R...$RCtagF\$\n"));
+                        push (@tags_alt_4_bothanch, (">$sample\n^$RCtagR...$tag_F\$\n"));
+                        }
+        print ALT1 @tags_alt_1_bothanch;
+        print ALT2 @tags_alt_2_bothanch;
+        print ALT3 @tags_alt_3_bothanch;
+        print ALT4 @tags_alt_4_bothanch;
+}
+
diff --git a/2_demulti/2b_demulti_dual_index_ionboth.sh b/2_demulti/2b_demulti_dual_index_ionboth.sh
new file mode 100644
index 0000000..1d2d76d
--- /dev/null
+++ b/2_demulti/2b_demulti_dual_index_ionboth.sh
@@ -0,0 +1,45 @@
+#/bin/bash
+##RUN preparing_tags_LCPI.pl to format your barcodes files.
+#input mapping file format:
+#Sample1    tagF      tagR
+#Sample2  ACCTGAAT  ATACAGA
+####tab delimited!
+#check this mapping file for duplicates in excel before sending to cluster
+#write sample names without space, e.g sample 23 as sample_23 or sample23.
+####DO NOT USE NUMBERS in the beginning of your sample names
+#perl preparing_tags_LCPI.pl
+                #my_mapping_file.txt
+                                #linked
+#the perl script should create 3 barcode files, Barcodes_LA1.txt, Barcodes_LA2.txt, Barcodes_LA3.txt for 'linked'
+#the linked mode is 5' and 3' anchored
+
+#ANY CUTADAPT ISSUE OR DOUBTS, SEE: https://cutadapt.readthedocs.io/en/stable/guide.html
+
+module --quiet purge
+module load StdEnv
+module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
+
+INPUT="${1}"
+ERR="${2}"
+PAIR1="Tags_alt1_bothanch.fa"
+PAIR2="Tags_alt1_bothanch.fa"
+PAIR3="Tags_alt1_bothanch.fa"
+PAIR4="Tags_alt1_bothanch.fa"
+
+### demultiplex (Linked Adapter)
+
+cutadapt	\
+	--quiet	\
+        -a file:${PAIR1}        \
+        -a file:${PAIR2}        \
+        -a file:${PAIR3}        \
+	-a file:${PAIR4}        \
+        -o "{name}_Ion_LA.fq"  \
+	-e ${ERR}	\
+        --action=lowercase      \
+        ${INPUT}
+		
+mkdir demulti_ionboth_${ERR}err		
+mv *.fq demulti_ionboth_${ERR}err
+./count_fastq_sequences.sh demulti_ionboth_${ERR}err/*.fq > demulti_ionboth_${ERR}err_count.txt
+
diff --git a/2_demulti/2b_demulti_dual_index_linked.sh b/2_demulti/2b_demulti_dual_index_linked.sh
new file mode 100644
index 0000000..87cd9fb
--- /dev/null
+++ b/2_demulti/2b_demulti_dual_index_linked.sh
@@ -0,0 +1,45 @@
+#/bin/bash
+##RUN preparing_tags_LCPI.pl to format your barcodes files.
+#input mapping file format:
+#Sample1    tagF      tagR
+#Sample2  ACCTGAAT  ATACAGA
+####tab delimited!
+#check this mapping file for duplicates in excel before sending to cluster
+#write sample names without space, e.g sample 23 as sample_23 or sample23.
+####DO NOT USE NUMBERS in the beginning of your sample names
+#perl preparing_tags_LCPI.pl
+                #my_mapping_file.txt
+                                #linked
+#the perl script should create 3 barcode files, Barcodes_LA1.txt, Barcodes_LA2.txt, Barcodes_LA3.txt for 'linked'
+#the linked mode is 5' and 3' anchored
+
+#ANY CUTADAPT ISSUE OR DOUBTS, SEE: https://cutadapt.readthedocs.io/en/stable/guide.html
+
+module --quiet purge
+module load StdEnv
+module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
+
+INPUT="${1}"
+ERR="${2}"
+PAIR1="Tags_LA1.fa"
+PAIR2="Tags_LA2.fa"
+PAIR3="Tags_LA3.fa"
+PAIR4="Tags_LA4.fa"
+
+### demultiplex (Linked Adapter)
+
+cutadapt	\
+	--quiet	\
+        -a file:${PAIR1}        \
+        -a file:${PAIR2}        \
+        -a file:${PAIR3}        \
+	-a file:${PAIR4}        \
+        -o "{name}_LA.fq"  \
+	-e ${ERR}	\
+        --action=lowercase      \
+        ${INPUT}
+		
+mkdir demulti_linked_${ERR}err		
+mv *.fq demulti_linked_${ERR}err
+./count_fastq_sequences.sh demulti_linked_${ERR}err/*.fq > demulti_linked_${ERR}err_count.txt
+

From 2ef1081f90e28d8172a14c95b5e3e867662b92c4 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:33:25 -0300
Subject: [PATCH 06/13] Delete demulti_dual_index_linked.sh

---
 2_demulti/demulti_dual_index_linked.sh | 40 --------------------------
 1 file changed, 40 deletions(-)
 delete mode 100644 2_demulti/demulti_dual_index_linked.sh

diff --git a/2_demulti/demulti_dual_index_linked.sh b/2_demulti/demulti_dual_index_linked.sh
deleted file mode 100644
index 5683a3a..0000000
--- a/2_demulti/demulti_dual_index_linked.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#/bin/bash
-##FOR DUAL INDEX DESIGN, YOU MUST RUN preparing_tags_LCPI.pl to format your barcodes files.
-#input mapping file format:
-#Sample1    tagF      tagR
-#Sample2  ACCTGAAT  ATACAGA
-#tab delimited!
-#check this mapping file for duplicates in excel before sending to cluster
-#write sample names without space, e.g SamPLe 23 is not allowed, but SamPLe_23 is.
-#do not use numbers in the beginning of your sample names
-#perl preparing_tags_LCPI.pl
-                #my_mapping_file.txt
-                                #linked
-#the perl script should create 3 barcode files, Barcodes_LA1.txt, Barcodes_LA2.txt, Barcodes_LA3.txt for 'linked'
-#the linked mode is 5' and 2' anchored by default
-
-#ANY CUTADAPT ISSUE OR DOUBTS, SEE: https://cutadapt.readthedocs.io/en/stable/guide.html
-
-
-module --quiet purge
-module load StdEnv
-module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
-
-
-INPUT="my_training_set.assembled.fastq"
-PAIR1="Barcodes_LA1.fa"
-PAIR2="Barcodes_LA2.fa"
-PAIR3="Barcodes_LA3.fa"
-
-### demultiplex (Linked Adapter)
-
-cutadapt	\
-        -a file:${PAIR1}        \
-        -a file:${PAIR2}        \
-        -a file:${PAIR3}        \
-        -o "{name}_LA.fq"  \
-        --action=lowercase      \
-        ${INPUT}
-		
-mkdir demulti_linked		
-mv *.fq demulti_linked

From ae4bdda3842202f63ad666bc68fdcb0b0a9cd67c Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:33:47 -0300
Subject: [PATCH 07/13] Delete demulti_dual_index_ion.sh

---
 2_demulti/demulti_dual_index_ion.sh | 34 -----------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 2_demulti/demulti_dual_index_ion.sh

diff --git a/2_demulti/demulti_dual_index_ion.sh b/2_demulti/demulti_dual_index_ion.sh
deleted file mode 100644
index 1b79661..0000000
--- a/2_demulti/demulti_dual_index_ion.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#/bin/bash
-
-##FOR ION DUAL INDEX DESIGN, YOU MUST RUN preparing_tags_LCPI.pl to format your barcodes files.
-#input mapping file format tab delimited:
-#Sample1    tagF      tagR
-#Sample2  ACCTGAAT  ATACAGA
-#check this mapping file for duplicates in excel before sending to cluster
-#write sample names without space, e.g SamPLe 23 is not allowed, but SamPLe_23 is.
-#do not use numbers in the beginning of your sample names
-#perl preparing_tags_LCPI.pl
-		#my_mapping_file.txt 
-				#ion
-#the perl script should create 4 files Barcodes_alt1.fa to Barcodes_alt4.fa 
-
-
-INPUT="my_ion_dual_index_single_read.fastq"
-PAIR1="Barcodes_alt1.fa"
-PAIR2="Barcodes_alt2.fa"
-PAIR3="Barcodes_alt3.fa"
-PAIR4="Barcodes_alt4.fa"
-### demultiplex (Linked Adapter)
-
-cutadapt	\
-        -a file:${PAIR1}        \
-        -a file:${PAIR2}        \
-        -a file:${PAIR3}        \
-	-a file:${PAIR4}	\ 
-        -o "{name}_ion_LA.fq"  \
-        --action=lowercase      \
-        ${INPUT}
-		
-		
-mkdir demulti_iondual
-mv *.fq demulti_iondual

From 8633daa60b30fce6439c698db2ee90bce52fef04 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:34:05 -0300
Subject: [PATCH 08/13] Delete demulti_unique_dual_index.sh

---
 2_demulti/demulti_unique_dual_index.sh | 36 --------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 2_demulti/demulti_unique_dual_index.sh

diff --git a/2_demulti/demulti_unique_dual_index.sh b/2_demulti/demulti_unique_dual_index.sh
deleted file mode 100644
index 30b456d..0000000
--- a/2_demulti/demulti_unique_dual_index.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#/bin/bash
-
-###DEMULTIPLEXING ILLUMINA DUAL INDEXED EXACT PAIRS
-###THIS STRATEGY IS EXCLUSIVE FOR PRIMER DESIGN UNIQUE, NO INDEX IS SHARED, ALL SAMPLES INDEPENDENT
-##The preparing_tags_LCPI.pl only format your barcodes files as two forward fasta files and two reverse complement fasta files:
-#input mapping file format tab delimited:
-#Sample1    tagF      tagR
-#Sample2  ACCTGAAT  ATACAGA
-#output:
-#Barcode_R1.fa     Barcode_R2.fa     Barcode_R1_RC.fa     Barcode_R2_RC.fa
-#>Sample1          >Sample1          >Sample1             >Sample1
-#ACCTGAAT          ACCTACAG          ATTCAGGT             CTGTAGGT   
-
-#check the mapping file for duplicates in excel before sending to server
-#write sample names without space, e.g SamPLe 23 is not allowed, but SamPLe_23 is.
-#do not use numbers in the beginning of your sample names
-#perl preparing_tags_LCPI.pl
-                #my_mapping_file.txt
-                                #unique
-#ANY CUTADAPT ISSUE OR DOUBTS, SEE: 
-#https://cutadapt.readthedocs.io/en/stable/guide.html
-#https://support.illumina.com/bulletins/2018/08/understanding-unique-dual-indexes--udi--and-associated-library-p.html
-
-INPUT_F="My_project_my_marker_R1.fastq"
-INPUT_R="My_project_my_marker_R2.fastq"
-R1_TAGS="Barcode_R1.fa"
-R2_TAGS="Barcode_R2.fa"
-R1_TAGS_RC="Barcode_R1_RC.fa"
-R2_TAGS_RC="Barcode_R2_RC.fa"
-
-cutadapt --pair-adapters -a file:${R1_TAGS} -a file:${R2_TAGS_RC} -G file:${R2_TAGS} -G file:${R1_TAGS_RC} -o {name}.1.fq -p {name}.2.fq ${INPUT_R1} ${INPUT_R2} --info-file matches.txt
-
-
-mkdir demulti_samples
-mv *.fq demulti_samples
-

From 1c3f479cc199c392060ec79a57c8dced74651aa3 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:34:16 -0300
Subject: [PATCH 09/13] Delete preparing_tags_LCPI.pl

---
 2_demulti/preparing_tags_LCPI.pl | 253 -------------------------------
 1 file changed, 253 deletions(-)
 delete mode 100644 2_demulti/preparing_tags_LCPI.pl

diff --git a/2_demulti/preparing_tags_LCPI.pl b/2_demulti/preparing_tags_LCPI.pl
deleted file mode 100644
index 2b7236a..0000000
--- a/2_demulti/preparing_tags_LCPI.pl
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/perl
-use diagnostics;
-use warnings;
-use strict;
-package ECHO_MODULE;
-package main;
-
-print "#####DEMULTIPLEXING DUAL INDEXED LIBRARIES#####\n";
-print "EXPECTED INPUT:\nsample1\ttagF\ttagR\nsample2\ttagF\ttagR\n...\t#same as in your excel file\n##DO NOT INCLUDE PRIMERS NOW\n\n";
-print "Please, enter your mapping file to edit:\t";
-my $arq1 = <STDIN>;
-chomp $arq1;
-open (MYFILE, $arq1);
-my @file = <MYFILE>;
-close (MYFILE);
-
-print ">>>For Illumina merged reads type 'linked'\n>>>For Illumina combinatorial type 'combinatorial'\n>>>For Illumina exact paired dual index type 'unique'\n>>>For Ion torrent dual index type 'ion'\n>>>For Ion dual index 3' anchored 'ion3'\n>>>For Ion dual index 5' anchored 'ion5'\n>>>For Ion dual index both anchored 'ion-both'\t";
-chop (my $subname = <STDIN>);
-if ($subname eq 'linked') {
-	&linked;
-}
-if ($subname eq 'combinatorial')	{
-	&combinatorial;
-}
-if ($subname eq 'unique')    {
-        &unique;
-}
-if ($subname eq 'ion')       {
-        &ion;
-}
-if ($subname eq 'ion3')       {
-        &ion3;
-}
-if ($subname eq 'ion5')       {
-        &ion5;
-}
-elsif ($subname eq 'ion-both')	{
-	&ionboth;
-}
-
-sub linked
-{
-open (NEW_FILE1, '>>Barcodes_LA1.fa');
-open (NEW_FILE2, '>>Barcodes_LA2.fa');
-open (NEW_FILE3, '>>Barcodes_LA3.fa');
-my @new_file1=();
-my @new_file2=();
-my @new_file3=();
-        foreach my $line (@file) {
-			chomp ($line);
-			$line =~ s/\R//g;
-                        my @fields= split(/\t/, $line);
-			my $sample=$fields[0];
-			my $tag_F=$fields[1];
-			my $tag_R=$fields[2];
-			my $RCtagF=reverse $tag_F;
-			$RCtagF =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
-			my $RCtagR=reverse $tag_R;
-			$RCtagR =~ tr/ATGCatgc/TACGtacg/; 
-			$RCtagR =~ s/[^\p{PosixAlnum},]//g;
-			push (@new_file1, (">$sample\n^$tag_F...$tag_R\$\n"));	
-			push (@new_file2, (">$sample\n^$RCtagR...$RCtagF\$\n"));
-			push (@new_file3, (">$sample\n^$tag_F...$RCtagR\$\n"));
-			}
-        print NEW_FILE1 @new_file1;
-	print NEW_FILE2 @new_file2;
-	print NEW_FILE3 @new_file3;
-}
-sub combinatorial
-{
-open (NEW_FILE1, '>>Barcodes_F.fa');
-open (NEW_FILE2, '>>Barcodes_R.fa');
-my @new_file1=();
-my @new_file2=();
-        foreach my $line (@file) {
-                        chomp ($line);
-                        $line =~ s/\R//g;
-			my @fields= split(/\t/, $line);
-                        my $sample=$fields[0];
-                        my $tag_F=$fields[1];
-                        my $tag_R=$fields[2];
-			push (@new_file1, (">$sample\n$tag_F\n"));
-                        push (@new_file2, (">$sample\n^$tag_R\n"));
-			}
-        print NEW_FILE1 @new_file1;
-        print NEW_FILE2 @new_file2;
-}
-sub unique
-{
-open (NEW_FILE1, '>>Barcode_R1.fa');
-open (NEW_FILE2, '>>Barcode_R2.fa');
-open (NEW_FILE3, '>>Barcode_R1_RC.fa');
-open (NEW_FILE4, '>>Barcode_R2_RC.fa');
-my @new_file1=();
-my @new_file2=();
-my @new_file3=();
-my @new_file4=();
-        foreach my $line (@file) {
-                        chomp ($line);
-                        my @fields= split(/\t/, $line);
-                        my $sample=$fields[0];
-                        my $tag_F=$fields[1];
-                        my $tag_R=$fields[2];
-                        my $RCtagF=reverse $tag_F;
-                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagR=reverse $tag_R;
-                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
-                        push (@new_file1, (">$sample\n$tag_F\n"));
-                        push (@new_file2, (">$sample\n$tag_R\n"));
-                        push (@new_file3, (">$sample\n$RCtagF\n"));
-			push (@new_file4, (">$sample\n$RCtagR\n"));
-                        }
-        print NEW_FILE1 @new_file1;
-        print NEW_FILE2 @new_file2;
-        print NEW_FILE3 @new_file3;
-	print NEW_FILE4 @new_file4;
-}
-sub ion
-{
-open (ALT1, '>>Barcodes_alt1.fa');
-open (ALT2, '>>Barcodes_alt2.fa');
-open (ALT3, '>>Barcodes_alt3.fa');
-open (ALT4, '>>Barcodes_alt4.fa');
-my @tags_alt_1=();
-my @tags_alt_2=();
-my @tags_alt_3=();
-my @tags_alt_4=();
-	foreach my $line (@file) {
-                        my @fields= split(/\t/, $line);
-                        my $sample=$fields[0];
-                        my $tag_F=$fields[1];
-                        my $tag_R=$fields[2];
-			$tag_F =~ s/[^\p{PosixAlnum},]//g;
-			$tag_R =~ s/[^\p{PosixAlnum},]//g;
-			my $RCtagF=reverse $tag_F;
-                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagR=reverse $tag_R;
-                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
-                        push (@tags_alt_1, (">$sample\n$tag_F...$RCtagR\n"));
-			push (@tags_alt_2, (">$sample\n$RCtagF...$tag_R\n"));
-			push (@tags_alt_3, (">$sample\n$tag_R...$RCtagF\n"));
-			push (@tags_alt_4, (">$sample\n$RCtagR...$tag_F\n"));
-                        }
-        print ALT1 @tags_alt_1;
-	print ALT2 @tags_alt_2;
-	print ALT3 @tags_alt_3;
-	print ALT4 @tags_alt_4;
-}
-sub ion3
-{
-open (ALT1, '>>Barcodes_alt1_3anch.fa');
-open (ALT2, '>>Barcodes_alt2_3anch.fa');
-open (ALT3, '>>Barcodes_alt3_3anch.fa');
-open (ALT4, '>>Barcodes_alt4_3anch.fa');
-my @tags_alt_1_3anch=();
-my @tags_alt_2_3anch=();
-my @tags_alt_3_3anch=();
-my @tags_alt_4_3anch=();
-        foreach my $line (@file) {
-                        my @fields= split(/\t/, $line);
-                        my $sample=$fields[0];
-                        my $tag_F=$fields[1];
-                        my $tag_R=$fields[2];
-                        $tag_F =~ s/[^\p{PosixAlnum},]//g;
-                        $tag_R =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagF=reverse $tag_F;
-                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagR=reverse $tag_R;
-                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
-                        push (@tags_alt_1_3anch, (">$sample\n$tag_F...$RCtagR\$\n"));
-                        push (@tags_alt_2_3anch, (">$sample\n$RCtagF...$tag_R\$\n"));
-                        push (@tags_alt_3_3anch, (">$sample\n$tag_R...$RCtagF\$\n"));
-                        push (@tags_alt_4_3anch, (">$sample\n$RCtagR...$tag_F\$\n"));
-                        }
-        print ALT1 @tags_alt_1_3anch;
-        print ALT2 @tags_alt_2_3anch;
-        print ALT3 @tags_alt_3_3anch;
-        print ALT4 @tags_alt_4_3anch;
-}
-sub ion5
-{
-open (ALT1, '>>Barcodes_alt1_5anch.fa');
-open (ALT2, '>>Barcodes_alt2_5anch.fa');
-open (ALT3, '>>Barcodes_alt3_5anch.fa');
-open (ALT4, '>>Barcodes_alt4_5anch.fa');
-my @tags_alt_1_5anch=();
-my @tags_alt_2_5anch=();
-my @tags_alt_3_5anch=();
-my @tags_alt_4_5anch=();
-        foreach my $line (@file) {
-                        my @fields= split(/\t/, $line);
-                        my $sample=$fields[0];
-                        my $tag_F=$fields[1];
-                        my $tag_R=$fields[2];
-                        $tag_F =~ s/[^\p{PosixAlnum},]//g;
-                        $tag_R =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagF=reverse $tag_F;
-                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagR=reverse $tag_R;
-                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
-                        push (@tags_alt_1_5anch, (">$sample\n^$tag_F...$RCtagR\n"));
-                        push (@tags_alt_2_5anch, (">$sample\n^$RCtagF...$tag_R\n"));
-                        push (@tags_alt_3_5anch, (">$sample\n^$tag_R...$RCtagF\n"));
-                        push (@tags_alt_4_5anch, (">$sample\n^$RCtagR...$tag_F\n"));
-                        }
-        print ALT1 @tags_alt_1_5anch;
-        print ALT2 @tags_alt_2_5anch;
-        print ALT3 @tags_alt_3_5anch;
-        print ALT4 @tags_alt_4_5anch;
-}
-sub ionboth
-{
-open (ALT1, '>>Barcodes_alt1_bothanch.fa');
-open (ALT2, '>>Barcodes_alt2_bothanch.fa');
-open (ALT3, '>>Barcodes_alt3_bothanch.fa');
-open (ALT4, '>>Barcodes_alt4_bothanch.fa');
-my @tags_alt_1_bothanch=();
-my @tags_alt_2_bothanch=();
-my @tags_alt_3_bothanch=();
-my @tags_alt_4_bothanch=();
-        foreach my $line (@file) {
-                        my @fields= split(/\t/, $line);
-                        my $sample=$fields[0];
-                        my $tag_F=$fields[1];
-                        my $tag_R=$fields[2];
-                        $tag_F =~ s/[^\p{PosixAlnum},]//g;
-                        $tag_R =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagF=reverse $tag_F;
-                        $RCtagF =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagF =~ s/[^\p{PosixAlnum},]//g;
-                        my $RCtagR=reverse $tag_R;
-                        $RCtagR =~ tr/ATGCatgc/TACGtacg/;
-                        $RCtagR =~ s/[^\p{PosixAlnum},]//g;
-                        push (@tags_alt_1_bothanch, (">$sample\n^$tag_F...$RCtagR\$\n"));
-                        push (@tags_alt_2_bothanch, (">$sample\n^$RCtagF...$tag_R\$\n"));
-                        push (@tags_alt_3_bothanch, (">$sample\n^$tag_R...$RCtagF\$\n"));
-                        push (@tags_alt_4_bothanch, (">$sample\n^$RCtagR...$tag_F\$\n"));
-                        }
-        print ALT1 @tags_alt_1_bothanch;
-        print ALT2 @tags_alt_2_bothanch;
-        print ALT3 @tags_alt_3_bothanch;
-        print ALT4 @tags_alt_4_bothanch;
-}
-

From 96c846d9d32cdda99b45f656b0f3320965b9f752 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:46:58 -0300
Subject: [PATCH 10/13] Update README.demulti

---
 2_demulti/README.demulti | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/2_demulti/README.demulti b/2_demulti/README.demulti
index d859a3c..746340e 100644
--- a/2_demulti/README.demulti
+++ b/2_demulti/README.demulti
@@ -1,3 +1,9 @@
+#########################################################################
+> perl 2a_preparing_tags_LCPI.pl my_mapping_file.txt linked
+Output:
+Tags_LA1.fa   Tags_LA2.fa   Tags_LA3.fa   Tags_LA4.fa
+> ./2b_demulti_dual_index_linked.sh my_data.assembled.fastq
+#########################################################################
 If you are not familiar with this step, please read all the orientation:
 For all demultiplexing strategies based on cutadapt the mapping file must be a tab separated text file like this:
 
@@ -6,21 +12,8 @@ sample1   	AGGTACGCAATT	CCTAAACTACGG
 sample2    	ACAGCCACCCAT	CCTAAACTACGG
 sample3    	TGTCTCGCAAGC	CCTAAACTACGG
 
-then we format it according to your tag primer design using the perl script preparing_tags_LCPI.pl. The dominant tag orientation for Illumina dual index merged pairs: (3'tagF)...(5'RCtagR)
-perl preparing_tags_LCPI.pl
-#####DEMULTIPLEXING DUAL INDEXED LIBRARIES#####
-EXPECTED INPUT:
-sample1 tagF    tagR
-sample2 tagF    tagR
-...     #same as in your excel file
-##DO NOT INCLUDE PRIMERS NOW
-
-Please, enter your mapping file to edit: my_mapping_file.txt
->>>For Illumina merged reads type 'linked'
->>>For Illumina combinatorial type 'combinatorial'
->>>For Illumina exact paired dual index type 'unique'
->>>For Ion torrent dual index type 'ion'
->>> type your mode
+then we format it according to your tag primer design using the perl script preparing_tags_LCPI.pl. 
+The dominant tag orientation for Illumina dual index merged pairs: (3'tagF)...(5'RCtagR)
 
 for merged R1-R2 Illumina files in linked mode, the tags ar formatted like this:
 head Barcodes_LA.fa

From 8c90d73322e9d6bf3e23b306b7a322b29e965852 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:48:32 -0300
Subject: [PATCH 11/13] Update README.merge

---
 1_merge/README.merge | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/1_merge/README.merge b/1_merge/README.merge
index 8aab94f..b94c3f0 100644
--- a/1_merge/README.merge
+++ b/1_merge/README.merge
@@ -11,6 +11,8 @@ Usage: ./1_merge_pear.sh [-f R1.fq] [-r R2.fq] [-o output] [-p 0.001] [-s 20] [-
 -t     threads
 -h     print this help
 ##################################################
+Output:
+my_data.assembled.fastq
 
 MERGING STEP -> PEAR 
 IF YOU WOULD LIKE TO EDIT THE SCRIPT BY YOURSELF:

From aaac341c9c3a8aa9adba99053f83b0f1bd87ed45 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:50:36 -0300
Subject: [PATCH 12/13] updated executable

./3_tag_primer_clipping_clean_derep.sh -h
---
 .../3_tag_primer_clipping_clean_derep.sh      | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 3_read_cleaning/3_tag_primer_clipping_clean_derep.sh

diff --git a/3_read_cleaning/3_tag_primer_clipping_clean_derep.sh b/3_read_cleaning/3_tag_primer_clipping_clean_derep.sh
new file mode 100644
index 0000000..2798bb0
--- /dev/null
+++ b/3_read_cleaning/3_tag_primer_clipping_clean_derep.sh
@@ -0,0 +1,117 @@
+#/bin/bash
+
+TAGS=''
+PRIMER_F=''
+PRIMER_R=''
+PREFIX=''
+DIR=''
+PRIMER_F_RC=''
+PRIMER_R_RC=''
+MIN_F=$(( ${#PRIMER_F} * 2 / 3 ))
+MIN_R=$(( ${#PRIMER_R} * 2 / 3 ))
+MIN_LEN=''
+EE=''
+
+usage () {
+        echo "##################################################"
+        echo "TAGS AND PRIMERS CLIPPING"
+        echo "##################################################"
+	echo "get your primer's reverse complement in this website:"
+	echo "http://arep.med.harvard.edu/labgc/adnan/projects/Utilities/revcomp.html"
+	echo " "
+        echo "Usage: ${0} [-b tags file] [-F 5' primer 'Forward'] [-R 3' primer 'Reverse'] [-f revcom 5' primer] [-r revcom 3' primer] [-e 0.1] [-p prefix] [-l amplicon min length] [-d path to demultiplexed samples]"
+        echo "-b     barcodes/tags mapping file (used in demultiplexing step)"
+        echo "-F     5' primer (forward direction, like in your primers design file)"
+        echo "-R     3' primer (forward direction)"
+        echo "-f     5' primer reverse complement"
+        echo "-r     3' primer reverse complement"
+	echo "-e     maximum error rate (the same used for demultiplexing)"
+	echo "Error rate: allowed errors/adapter or primer length. 2 errors in a 12 bp tag = 2/12 = 0.17"
+	echo "4 errors in a 26bp degenerated primer = 4/26 = 0.16, 2 errors in a 20bp primer = 0.1 (Cutadapt default)."
+        echo "-p     PREFIX to quality file (it MUST be the SAME prefix throughout the ENTIRE PIPELINE)"
+        echo "-l     minimum length for the final amplicon"
+        echo "-d     path to demultiplexed samples directory. Use pwd."
+        echo "-h     print this help"
+        echo " "
+        echo "##################################################"
+                1>&2; exit 1;
+
+}
+while getopts "b:F:R:f:r:e:p:d:l:h" option; do
+        case $option in
+        b) TAGS="${OPTARG}"
+                ;;
+        F) PRIMER_F="${OPTARG}"
+                ;;
+        R) PRIMER_R="${OPTARG}"
+                ;;
+        f) PRIMER_F_RC="${OPTARG}"
+                ;;
+        r) PRIMER_R_RC="${OPTARG}"
+                ;;
+	e) EE="${OPTARG}"
+		;;
+        p) PREFIX="${OPTARG}"
+                ;;
+        d) DIR="${OPTARG}"
+                ;;
+        l) MIN_LEN="${OPTARG}"
+                ;;
+        h | *) usage
+                exit 0
+                ;;
+        \?) echo "Invalid option: -$OPTARG"
+                exit 1
+                ;;
+        : ) echo "Missing argument for -${OPTARG}" >&2
+                exit 1
+                ;;
+   esac
+done
+
+module --force purge
+module load StdEnv
+module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
+
+mkdir -p clip_out
+mkdir -p clip_out/tag_clip
+for file in "${DIR}"/*_LA.fq; do LA="$(basename $file _LA.fq)"; cutadapt -j 8 --quiet -e $EE -a file:Tags_LA1.fa -a file:Tags_LA2.fa -a file:Tags_LA3.fa --action=trim --trim-n --max-n 0 --minimum-length $MIN_LEN -o ${LA}_trim1.fq $file; done;
+
+for f in *_trim1.fq; do trim1="$(basename $f _trim1.fq)"; cutadapt -j 8 --quiet -e $EE -a "${PRIMER_F}...${PRIMER_R_RC}" -O $MIN_F -a "${PRIMER_R}...${PRIMER_F_RC}" -O $MIN_R --action=trim --minimum-length $MIN_LEN -o ${trim1}_trim2.fq $f; done;
+mv *_trim1.fq clip_out/tag_clip
+
+module --force purge
+module load StdEnv
+module load VSEARCH/2.13.4-iccifort-2019.1.144-GCC-8.2.0-2.31.1
+
+mkdir -p clip_out/primer_clip
+# Discard sequences containing Ns, add expected error rates and convert to fasta
+for j in *_trim2.fq; do trim2="$(basename $j _trim2.fq)"; vsearch --quiet --fastq_filter $j --relabel_sha1 --fastq_qmax 45 --eeout --fastq_maxns 0 --fastqout ${trim2}_trim3.fq; done;
+mv *_trim2.fq clip_out/primer_clip
+
+mkdir -p clip_out/trimming
+for k in *_trim3.fq; do trim3a="$(basename $k _trim3.fq)"; vsearch --quiet --fastq_filter $k --xee --fastaout ${trim3a}_trim3.fasta; done;
+
+#quality file
+# Discard quality lines, extract hash, expected error rates and read length
+QUALITY_FILE=${PREFIX}.qual
+TMP_QUAL=$(mktemp --tmpdir=".")
+for q in *_trim3.fq; do \
+        sed 'n;n;N;d' $q | \
+        awk 'BEGIN {FS = "[;=]"}
+           {if (/^@/) {printf "%s\t%s\t", $1, $3} else {print length($1)}}' | \
+        tr -d "@" >> "${TMP_QUAL}"
+done < "${TAGS}"
+# Produce the final quality file
+sort -k3,3n -k1,1d -k2,2n $TMP_QUAL  | \
+   uniq --check-chars=40 > "${QUALITY_FILE}"
+rm $TMP_QUAL
+mv *_trim3.fq clip_out/trimming
+
+mkdir dereplicated
+for p in *_trim3.fasta; do trim3b="$(basename $p _trim3.fasta)"; vsearch --quiet --derep_fulllength $p --sizeout --fasta_width 0 --output ${trim3b}_dp.fasta; done;
+rm *_trim3.fasta
+mv *_dp.fasta dereplicated
+
+
+

From 9f6a30f0d7e045ad6737d34b375650dd5bc25965 Mon Sep 17 00:00:00 2001
From: Marcele Laux <marcelelaux@gmail.com>
Date: Tue, 21 Feb 2023 11:51:19 -0300
Subject: [PATCH 13/13] updated executable

./3_tag_primer_clipping_clean_derep.sh -h
---
 .../3_tag_primer_clipping_clean_derep.sh      | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 4_dereplicate/3_tag_primer_clipping_clean_derep.sh

diff --git a/4_dereplicate/3_tag_primer_clipping_clean_derep.sh b/4_dereplicate/3_tag_primer_clipping_clean_derep.sh
new file mode 100644
index 0000000..2798bb0
--- /dev/null
+++ b/4_dereplicate/3_tag_primer_clipping_clean_derep.sh
@@ -0,0 +1,117 @@
+#/bin/bash
+
+TAGS=''
+PRIMER_F=''
+PRIMER_R=''
+PREFIX=''
+DIR=''
+PRIMER_F_RC=''
+PRIMER_R_RC=''
+MIN_F=$(( ${#PRIMER_F} * 2 / 3 ))
+MIN_R=$(( ${#PRIMER_R} * 2 / 3 ))
+MIN_LEN=''
+EE=''
+
+usage () {
+        echo "##################################################"
+        echo "TAGS AND PRIMERS CLIPPING"
+        echo "##################################################"
+	echo "get your primer's reverse complement in this website:"
+	echo "http://arep.med.harvard.edu/labgc/adnan/projects/Utilities/revcomp.html"
+	echo " "
+        echo "Usage: ${0} [-b tags file] [-F 5' primer 'Forward'] [-R 3' primer 'Reverse'] [-f revcom 5' primer] [-r revcom 3' primer] [-e 0.1] [-p prefix] [-l amplicon min length] [-d path to demultiplexed samples]"
+        echo "-b     barcodes/tags mapping file (used in demultiplexing step)"
+        echo "-F     5' primer (forward direction, like in your primers design file)"
+        echo "-R     3' primer (forward direction)"
+        echo "-f     5' primer reverse complement"
+        echo "-r     3' primer reverse complement"
+	echo "-e     maximum error rate (the same used for demultiplexing)"
+	echo "Error rate: allowed errors/adapter or primer length. 2 errors in a 12 bp tag = 2/12 = 0.17"
+	echo "4 errors in a 26bp degenerated primer = 4/26 = 0.16, 2 errors in a 20bp primer = 0.1 (Cutadapt default)."
+        echo "-p     PREFIX to quality file (it MUST be the SAME prefix throughout the ENTIRE PIPELINE)"
+        echo "-l     minimum length for the final amplicon"
+        echo "-d     path to demultiplexed samples directory. Use pwd."
+        echo "-h     print this help"
+        echo " "
+        echo "##################################################"
+                1>&2; exit 1;
+
+}
+while getopts "b:F:R:f:r:e:p:d:l:h" option; do
+        case $option in
+        b) TAGS="${OPTARG}"
+                ;;
+        F) PRIMER_F="${OPTARG}"
+                ;;
+        R) PRIMER_R="${OPTARG}"
+                ;;
+        f) PRIMER_F_RC="${OPTARG}"
+                ;;
+        r) PRIMER_R_RC="${OPTARG}"
+                ;;
+	e) EE="${OPTARG}"
+		;;
+        p) PREFIX="${OPTARG}"
+                ;;
+        d) DIR="${OPTARG}"
+                ;;
+        l) MIN_LEN="${OPTARG}"
+                ;;
+        h | *) usage
+                exit 0
+                ;;
+        \?) echo "Invalid option: -$OPTARG"
+                exit 1
+                ;;
+        : ) echo "Missing argument for -${OPTARG}" >&2
+                exit 1
+                ;;
+   esac
+done
+
+module --force purge
+module load StdEnv
+module load cutadapt/2.10-GCCcore-9.3.0-Python-3.8.2
+
+mkdir -p clip_out
+mkdir -p clip_out/tag_clip
+for file in "${DIR}"/*_LA.fq; do LA="$(basename $file _LA.fq)"; cutadapt -j 8 --quiet -e $EE -a file:Tags_LA1.fa -a file:Tags_LA2.fa -a file:Tags_LA3.fa --action=trim --trim-n --max-n 0 --minimum-length $MIN_LEN -o ${LA}_trim1.fq $file; done;
+
+for f in *_trim1.fq; do trim1="$(basename $f _trim1.fq)"; cutadapt -j 8 --quiet -e $EE -a "${PRIMER_F}...${PRIMER_R_RC}" -O $MIN_F -a "${PRIMER_R}...${PRIMER_F_RC}" -O $MIN_R --action=trim --minimum-length $MIN_LEN -o ${trim1}_trim2.fq $f; done;
+mv *_trim1.fq clip_out/tag_clip
+
+module --force purge
+module load StdEnv
+module load VSEARCH/2.13.4-iccifort-2019.1.144-GCC-8.2.0-2.31.1
+
+mkdir -p clip_out/primer_clip
+# Discard sequences containing Ns, add expected error rates and convert to fasta
+for j in *_trim2.fq; do trim2="$(basename $j _trim2.fq)"; vsearch --quiet --fastq_filter $j --relabel_sha1 --fastq_qmax 45 --eeout --fastq_maxns 0 --fastqout ${trim2}_trim3.fq; done;
+mv *_trim2.fq clip_out/primer_clip
+
+mkdir -p clip_out/trimming
+for k in *_trim3.fq; do trim3a="$(basename $k _trim3.fq)"; vsearch --quiet --fastq_filter $k --xee --fastaout ${trim3a}_trim3.fasta; done;
+
+#quality file
+# Discard quality lines, extract hash, expected error rates and read length
+QUALITY_FILE=${PREFIX}.qual
+TMP_QUAL=$(mktemp --tmpdir=".")
+for q in *_trim3.fq; do \
+        sed 'n;n;N;d' $q | \
+        awk 'BEGIN {FS = "[;=]"}
+           {if (/^@/) {printf "%s\t%s\t", $1, $3} else {print length($1)}}' | \
+        tr -d "@" >> "${TMP_QUAL}"
+done < "${TAGS}"
+# Produce the final quality file
+sort -k3,3n -k1,1d -k2,2n $TMP_QUAL  | \
+   uniq --check-chars=40 > "${QUALITY_FILE}"
+rm $TMP_QUAL
+mv *_trim3.fq clip_out/trimming
+
+mkdir dereplicated
+for p in *_trim3.fasta; do trim3b="$(basename $p _trim3.fasta)"; vsearch --quiet --derep_fulllength $p --sizeout --fasta_width 0 --output ${trim3b}_dp.fasta; done;
+rm *_trim3.fasta
+mv *_dp.fasta dereplicated
+
+
+