diff --git a/.github/workflows/dockerhub_push_release.yml b/.github/workflows/dockerhub_push_release.yml
deleted file mode 100644
index e8b6638..0000000
--- a/.github/workflows/dockerhub_push_release.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: deploy release
-# This builds the docker image and pushes it to DockerHub
-on:
-  release:
-     types: [published]
-jobs:
-  push_dockerhub:
-    name: Push new Docker image to Docker Hub (release)
-    runs-on: ubuntu-latest
-    # Only run for the official repo, for releases and merged PRs
-    if: ${{ github.repository == 'BU-ISCIII/taranis' }}
-    env:
-      DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
-      DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASSWORD }}
-    steps:
-      - name: Check out pipeline code
-        uses: actions/checkout@v2
-
-      - name: Build new docker image
-        run: docker build --no-cache . -t buisciii/taranis:${{ github.event.release.tag_name }}
-
-      - name: Push Docker image to DockerHub (develop)
-        run: |
-          echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
-          docker push buisciii/taranis:${{ github.event.release.tag_name }}
diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml
new file mode 100644
index 0000000..9d043bb
--- /dev/null
+++ b/.github/workflows/python_lint.yml
@@ -0,0 +1,35 @@
+name: python_lint
+
+on:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+
+jobs:
+  flake8_py3:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.9.x
+          architecture: x64
+      - name: Checkout PyTorch
+        uses: actions/checkout@master
+      - name: Install flake8
+        run: pip install flake8
+      - name: Run flake8
+        run: flake8 --ignore E501,W503,E203,W605
+
+  black_lint:
+    runs-on: ubuntu-latest
+    steps:
+        - name: Setup
+          uses: actions/checkout@v2
+        - name: Install black in jupyter
+          run: pip install black[jupyter]
+        - name: Check code lints with Black
+          uses: psf/black@stable
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
deleted file mode 100644
index ed66541..0000000
--- a/.github/workflows/tests.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: tests ci
-# This workflow runs the pipeline with the minimal test dataset to check that it completes any errors
-on:
-  push:
-    branches: [develop]
-  pull_request_target:
-    branches: [develop]
-  release:
-    types: [published]
-
-jobs:
-  push_dockerhub:
-    name: Push new Docker image to Docker Hub (dev)
-    runs-on: ubuntu-latest
-    # Only run for the official repo, for releases and merged PRs
-    if: ${{ github.repository == 'BU-ISCIII/taranis' }}
-    env:
-      DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
-      DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASSWORD }}
-    steps:
-      - name: Check out pipeline code
-        uses: actions/checkout@v2
-
-      - name: Build new docker image
-        run: docker build --no-cache . -t buisciii/taranis:dev
-
-      - name: Push Docker image to DockerHub (develop)
-        run: |
-          echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
-          docker push buisciii/taranis:dev
-  run-tests:
-    name: Run tests
-    needs: push_dockerhub
-    runs-on: ubuntu-latest
-    steps:
-      - name: Run pipeline with test data
-        run: |
-            docker run buisciii/taranis:dev bash -c /opt/taranis/test/test.sh
diff --git a/setup.py b/setup.py
index 4b6fad4..eda9691 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import setup, find_packages
 
-version = "2.2.0"
+version = "3.0.0"
 
 with open("README.md") as f:
     readme = f.read()
diff --git a/taranis/__main__.py b/taranis/__main__.py
index d08cb47..a281777 100644
--- a/taranis/__main__.py
+++ b/taranis/__main__.py
@@ -52,7 +52,7 @@ def run_taranis():
     )
 
     # stderr.print("[green]                                          `._,._,'\n", highlight=False)
-    __version__ = "2.1.0"
+    __version__ = "3.0.0"
     stderr.print(
         "\n" "[grey39]    Taranis version {}".format(__version__), highlight=False
     )
@@ -166,6 +166,12 @@ def taranis_cli(verbose, log_file):
     default=False,
     help="Remove no CDS alleles from the schema.",
 )
+@click.option(
+    "--output-allele-annot/--no-output-allele-annot",
+    required=False,
+    default=True,
+    help="get extension annotation for all alleles in locus",
+)
 @click.option(
     "--genus",
     required=False,
@@ -184,29 +190,41 @@ def taranis_cli(verbose, log_file):
     default="Genus",
     help="Use genus-specific BLAST databases for Prokka schema genes annotation (needs --genus). Default is False.",
 )
+@click.option(
+    "--cpus",
+    required=False,
+    multiple=False,
+    type=int,
+    default=1,
+    help="Number of cpus used for execution",
+)
 def analyze_schema(
     inputdir,
     output,
     remove_subset,
     remove_duplicated,
     remove_no_cds,
+    output_allele_annot,
     genus,
     species,
     usegenus,
+    cpus,
 ):
     schema_files = taranis.utils.get_files_in_folder(inputdir, "fasta")
 
     """
-    schema_analyze = {}
+    schema_analyze = []
     for schema_file in schema_files:
         schema_obj = taranis.analyze_schema.AnalyzeSchema(schema_file, output, remove_subset, remove_duplicated, remove_no_cds, genus, species, usegenus)
-        schema_analyze.update(schema_obj.analyze_allele_in_schema())
-    
-    """
+        schema_analyze.append(schema_obj.analyze_allele_in_schema())
+    import pdb; pdb.set_trace()
+    _ = taranis.analyze_schema.collect_statistics(schema_analyze, output, output_allele_annot)
+    sys.exit(0)
     # for schema_file in schema_files:
+    """
     results = []
     start = time.perf_counter()
-    with concurrent.futures.ProcessPoolExecutor() as executor:
+    with concurrent.futures.ProcessPoolExecutor(max_workers=cpus) as executor:
         futures = [
             executor.submit(
                 taranis.analyze_schema.parallel_execution,
@@ -224,10 +242,11 @@ def analyze_schema(
         # Collect results as they complete
         for future in concurrent.futures.as_completed(futures):
             results.append(future.result())
-    _ = taranis.analyze_schema.collect_statistics(results, output)
+    _ = taranis.analyze_schema.collect_statistics(results, output, output_allele_annot)
     finish = time.perf_counter()
     print(f"Schema analyze finish in {round((finish-start)/60, 2)} minutes")
 
+
 # Reference alleles
 @taranis_cli.command(help_priority=2)
 @click.option(
diff --git a/taranis/allele_calling.py b/taranis/allele_calling.py
index 20ef08c..d9a1e99 100644
--- a/taranis/allele_calling.py
+++ b/taranis/allele_calling.py
@@ -5,12 +5,14 @@
 
 import taranis.utils
 import taranis.blast
+
 # import numpy
 import pandas as pd
 from pathlib import Path
 
 
 import pdb
+
 log = logging.getLogger(__name__)
 stderr = rich.console.Console(
     stderr=True,
@@ -19,6 +21,7 @@
     force_terminal=taranis.utils.rich_force_colors(),
 )
 
+
 class AlleleCalling:
     def __init__(self, prediction, sample_file, schema, reference_alleles, out_folder):
         self.prediction = prediction
@@ -27,9 +30,25 @@ def __init__(self, prediction, sample_file, schema, reference_alleles, out_folde
         self.ref_alleles = reference_alleles
         self.out_folder = out_folder
         self.s_name = Path(sample_file).stem
-        self.blast_dir = os.path.join(out_folder,"blastdb")
+        self.blast_dir = os.path.join(out_folder, "blastdb")
         self.blast_sample = os.path.join(self.blast_dir, self.s_name)
-        self.blast_heading = ["qseqid", "sseqid", "pident", "qlen", "length", "mismatch", "gapopen", "evalue", "bitscore", "sstart", "send", "qstart", "qend", "sseq", "qseq"]
+        self.blast_heading = [
+            "qseqid",
+            "sseqid",
+            "pident",
+            "qlen",
+            "length",
+            "mismatch",
+            "gapopen",
+            "evalue",
+            "bitscore",
+            "sstart",
+            "send",
+            "qstart",
+            "qend",
+            "sseq",
+            "qseq",
+        ]
 
     def assign_allele_type(self, query_seq, allele_name, sample_contig, schema_gene):
         """_summary_
@@ -39,23 +58,22 @@ def assign_allele_type(self, query_seq, allele_name, sample_contig, schema_gene)
             allele_name (_type_): _description_
             sample_contig (_type_): _description_
             schema_gene (_type_): _description_
-        """        
+        """
         s_alleles_blast = taranis.blast.Blast("nucl")
         ref_allele_blast_dir = os.path.join(self.blast_dir, "ref_alleles")
         query_path = os.path.join(self.out_folder, "tmp", allele_name)
-        # Write to file the sequence to find out the loci name that fully match 
+        # Write to file the sequence to find out the loci name that fully match
         f_name = taranis.utils.write_fasta_file(query_path, query_seq, allele_name)
         query_file = os.path.join(query_path, f_name)
         _ = s_alleles_blast.create_blastdb(schema_gene, ref_allele_blast_dir)
-        # Blast with sample sequence to find the allele in the schema 
+        # Blast with sample sequence to find the allele in the schema
         seq_blast_match = s_alleles_blast.run_blast(query_file, perc_identity=100)
         pdb.set_trace()
         if len(seq_blast_match) >= 1:
-            # allele is named as NIPHEM 
-           
+            # allele is named as NIPHEM
+
             # Hacer un blast con la query esta secuencia y la database del alelo
             # Create  blast db with sample file
-            
 
             pass
         elif len(seq_blast_match) == 1:
@@ -63,14 +81,13 @@ def assign_allele_type(self, query_seq, allele_name, sample_contig, schema_gene)
         else:
             pass
 
-
-    def search_alleles (self, ref_allele):
+    def search_alleles(self, ref_allele):
         allele_name = Path(ref_allele).stem
-        schema_gene = os.path.join(self.schema, allele_name + ".fasta")  
+        schema_gene = os.path.join(self.schema, allele_name + ".fasta")
         allele_name = Path(ref_allele).stem
         # run blast with sample as db and reference allele as query
         sample_blast_match = self.sample_blast.run_blast(ref_allele)
-        if len(sample_blast_match) > 0 :
+        if len(sample_blast_match) > 0:
             pd_lines = pd.DataFrame([item.split("\t") for item in sample_blast_match])
             pd_lines.columns = self.blast_heading
             pd_lines["pident"] = pd_lines["pident"].apply(pd.to_numeric)
@@ -84,16 +101,17 @@ def search_alleles (self, ref_allele):
             # sel_row = np_lines[mask, :] = np_lines[mask, :]
             # query_seq = sel_row[0,14]
             sample_contig = sel_max["sseqid"]
-            abbr = self.assign_allele_type(query_seq, allele_name, sample_contig, schema_gene)
+            abbr = self.assign_allele_type(
+                query_seq, allele_name, sample_contig, schema_gene
+            )
         else:
             # Sample does not have a reference allele to be matched
             # Keep LNF info
             # ver el codigo de espe
-            #lnf_tpr_tag()
+            # lnf_tpr_tag()
             pass
         pdb.set_trace()
 
-
     def analyze_sample(self):
         # Create  blast db with sample file
         self.sample_blast = taranis.blast.Blast("nucl")
@@ -107,4 +125,3 @@ def analyze_sample(self):
 
         pdb.set_trace()
         return
-
diff --git a/taranis/allele_calling_old.py b/taranis/allele_calling_old.py
index 72d3294..e8be72f 100644
--- a/taranis/allele_calling_old.py
+++ b/taranis/allele_calling_old.py
@@ -28,16 +28,24 @@
 import plotly.graph_objects as go
 
 
-def check_blast (reference_allele, sample_files, db_name, logger) : ## N
+def check_blast(reference_allele, sample_files, db_name, logger):  ## N
     for s_file in sample_files:
-        f_name = os.path.basename(s_file).split('.')
+        f_name = os.path.basename(s_file).split(".")
         dir_name = os.path.dirname(s_file)
-        blast_dir = os.path.join(dir_name, db_name,f_name[0])
-        blast_db = os.path.join(blast_dir,f_name[0])
-        if not os.path.exists(blast_dir) :
-            logger.error('Blast db folder for sample %s does not exist', f_name)
+        blast_dir = os.path.join(dir_name, db_name, f_name[0])
+        blast_db = os.path.join(blast_dir, f_name[0])
+        if not os.path.exists(blast_dir):
+            logger.error("Blast db folder for sample %s does not exist", f_name)
             return False
-        cline = NcbiblastnCommandline(db=blast_db, evalue=0.001, outfmt=5, max_target_seqs=10, max_hsps=10,num_threads=1, query=reference_allele)
+        cline = NcbiblastnCommandline(
+            db=blast_db,
+            evalue=0.001,
+            outfmt=5,
+            max_target_seqs=10,
+            max_hsps=10,
+            num_threads=1,
+            query=reference_allele,
+        )
         out, err = cline()
 
         psiblast_xml = StringIO(out)
@@ -51,16 +59,18 @@ def check_blast (reference_allele, sample_files, db_name, logger) : ## N
                     alleleMatchid = int((blast_record.query_id.split("_"))[-1])
     return True
 
+
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · #
 # Parse samples and core genes schema fasta files to dictionary #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · #
 
-def parsing_fasta_file_to_dict (fasta_file, logger):
+
+def parsing_fasta_file_to_dict(fasta_file, logger):
     fasta_dict = {}
     fasta_dict_ordered = {}
     for contig in SeqIO.parse(fasta_file, "fasta"):
         fasta_dict[str(contig.id)] = str(contig.seq.upper())
-    logger.debug('file %s parsed to dictionary', fasta_file)
+    logger.debug("file %s parsed to dictionary", fasta_file)
 
     for key in sorted(list(fasta_dict.keys())):
         fasta_dict_ordered[key] = fasta_dict[key]
@@ -71,9 +81,11 @@ def parsing_fasta_file_to_dict (fasta_file, logger):
 # Get core genes schema info before allele calling analysis #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · #
 
-#def prepare_core_gene (core_gene_file_list, store_dir, ref_alleles_dir, logger):
-def prepare_core_gene (core_gene_file_list, store_dir, ref_alleles_dir, genus, species, usegenus, logger):
 
+# def prepare_core_gene (core_gene_file_list, store_dir, ref_alleles_dir, logger):
+def prepare_core_gene(
+    core_gene_file_list, store_dir, ref_alleles_dir, genus, species, usegenus, logger
+):
     ## Initialize dict for keeping id-allele, quality, length variability, length statistics and annotation info for each schema core gene
     alleles_in_locus_dict = {}
     schema_quality = {}
@@ -81,13 +93,11 @@ def prepare_core_gene (core_gene_file_list, store_dir, ref_alleles_dir, genus, s
     schema_variability = {}
     schema_statistics = {}
 
-
     ## Process each schema core gene
-    blast_dir = os.path.join(store_dir,'blastdb')
-    logger.info('start preparation of core genes files')
+    blast_dir = os.path.join(store_dir, "blastdb")
+    logger.info("start preparation of core genes files")
     for fasta_file in core_gene_file_list:
-
-        f_name = os.path.basename(fasta_file).split('.')
+        f_name = os.path.basename(fasta_file).split(".")
 
         # Parse core gene fasta file and keep id-sequence info in dictionary
         fasta_file_parsed_dict = parsing_fasta_file_to_dict(fasta_file, logger)
@@ -96,8 +106,8 @@ def prepare_core_gene (core_gene_file_list, store_dir, ref_alleles_dir, genus, s
         alleles_in_locus_dict[f_name[0]] = fasta_file_parsed_dict
 
         # dump fasta file into pickle file
-        #with open (file_list[-1],'wb') as f:
-         #   pickle.dump(fasta_file_parsed_dict, f)
+        # with open (file_list[-1],'wb') as f:
+        #   pickle.dump(fasta_file_parsed_dict, f)
 
         # Get core gene alleles quality
         locus_quality = check_core_gene_quality(fasta_file, logger)
@@ -106,63 +116,90 @@ def prepare_core_gene (core_gene_file_list, store_dir, ref_alleles_dir, genus, s
         schema_quality[f_name[0]] = locus_quality
 
         # Get gene and product annotation for core gene using reference allele(s)
-        ref_allele = os.path.join(ref_alleles_dir, f_name[0] + '.fasta')
+        ref_allele = os.path.join(ref_alleles_dir, f_name[0] + ".fasta")
 
-        gene_annot, product_annot = get_gene_annotation (ref_allele, store_dir, genus, species, usegenus, logger)
-        #gene_annot, product_annot = get_gene_annotation (ref_allele, store_dir, logger)
+        gene_annot, product_annot = get_gene_annotation(
+            ref_allele, store_dir, genus, species, usegenus, logger
+        )
+        # gene_annot, product_annot = get_gene_annotation (ref_allele, store_dir, logger)
         if f_name[0] not in annotation_core_dict.keys():
             annotation_core_dict[f_name[0]] = {}
         annotation_core_dict[f_name[0]] = [gene_annot, product_annot]
 
         # Get core gene alleles length to keep length variability and statistics info
         alleles_len = []
-        for allele in fasta_file_parsed_dict :
+        for allele in fasta_file_parsed_dict:
             alleles_len.append(len(fasta_file_parsed_dict[allele]))
 
-        #alleles_in_locus = list (SeqIO.parse(fasta_file, "fasta")) ## parse
-        #for allele in alleles_in_locus : ## parse
-            #alleles_len.append(len(str(allele.seq))) ## parse
+        # alleles_in_locus = list (SeqIO.parse(fasta_file, "fasta")) ## parse
+        # for allele in alleles_in_locus : ## parse
+        # alleles_len.append(len(str(allele.seq))) ## parse
 
-        schema_variability[f_name[0]]=list(set(alleles_len))
+        schema_variability[f_name[0]] = list(set(alleles_len))
 
         if len(alleles_len) == 1:
             stdev = 0
         else:
             stdev = statistics.stdev(alleles_len)
-        schema_statistics[f_name[0]]=[statistics.mean(alleles_len), stdev, min(alleles_len), max(alleles_len)]
-
-    return alleles_in_locus_dict, annotation_core_dict, schema_variability, schema_statistics, schema_quality
+        schema_statistics[f_name[0]] = [
+            statistics.mean(alleles_len),
+            stdev,
+            min(alleles_len),
+            max(alleles_len),
+        ]
+
+    return (
+        alleles_in_locus_dict,
+        annotation_core_dict,
+        schema_variability,
+        schema_statistics,
+        schema_quality,
+    )
 
 
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · #
 # Get Prodigal training file from reference genome for samples gene prediction  #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · #
 
-def prodigal_training(reference_genome_file, prodigal_dir, logger):
 
-    f_name = os.path.basename(reference_genome_file).split('.')[0]
-    prodigal_train_dir = os.path.join(prodigal_dir, 'training')
+def prodigal_training(reference_genome_file, prodigal_dir, logger):
+    f_name = os.path.basename(reference_genome_file).split(".")[0]
+    prodigal_train_dir = os.path.join(prodigal_dir, "training")
 
-    output_prodigal_train_dir = os.path.join(prodigal_train_dir, f_name + '.trn')
+    output_prodigal_train_dir = os.path.join(prodigal_train_dir, f_name + ".trn")
 
     if not os.path.exists(prodigal_train_dir):
         try:
             os.makedirs(prodigal_train_dir)
-            logger.debug('Created prodigal directory for training file %s', f_name)
+            logger.debug("Created prodigal directory for training file %s", f_name)
         except:
-            logger.info('Cannot create prodigal directory for training file %s', f_name)
-            print ('Error when creating the directory %s for training file', prodigal_train_dir)
+            logger.info("Cannot create prodigal directory for training file %s", f_name)
+            print(
+                "Error when creating the directory %s for training file",
+                prodigal_train_dir,
+            )
             exit(0)
 
-        prodigal_command = ['prodigal' , '-i', reference_genome_file, '-t', output_prodigal_train_dir]
-        prodigal_result = subprocess.run(prodigal_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-      #  if prodigal_result.stderr:
-       #     logger.error('cannot create training file for %s', f_name)
-        #    logger.error('prodigal returning error code %s', prodigal_result.stderr)
-         #   return False
+        prodigal_command = [
+            "prodigal",
+            "-i",
+            reference_genome_file,
+            "-t",
+            output_prodigal_train_dir,
+        ]
+        prodigal_result = subprocess.run(
+            prodigal_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+
+    #  if prodigal_result.stderr:
+    #     logger.error('cannot create training file for %s', f_name)
+    #    logger.error('prodigal returning error code %s', prodigal_result.stderr)
+    #   return False
     else:
-        logger.info('Skeeping prodigal training file creation for %s, as it has already been created', f_name)
+        logger.info(
+            "Skeeping prodigal training file creation for %s, as it has already been created",
+            f_name,
+        )
 
     return output_prodigal_train_dir
 
@@ -171,33 +208,59 @@ def prodigal_training(reference_genome_file, prodigal_dir, logger):
 # Get Prodigal sample gene prediction #
 # · * · * · * · * · * · * · * · * · * #
 
-def prodigal_prediction(file_name, prodigal_dir, prodigal_train_dir, logger):
 
-    f_name = '.'.join(os.path.basename(file_name).split('.')[:-1])
-    prodigal_dir_sample = os.path.join(prodigal_dir,f_name)
+def prodigal_prediction(file_name, prodigal_dir, prodigal_train_dir, logger):
+    f_name = ".".join(os.path.basename(file_name).split(".")[:-1])
+    prodigal_dir_sample = os.path.join(prodigal_dir, f_name)
 
-    output_prodigal_coord = os.path.join(prodigal_dir_sample, f_name + '_coord.gff') ## no
-    output_prodigal_prot = os.path.join(prodigal_dir_sample, f_name + '_prot.faa') ## no
-    output_prodigal_dna = os.path.join(prodigal_dir_sample, f_name + '_dna.faa')
+    output_prodigal_coord = os.path.join(
+        prodigal_dir_sample, f_name + "_coord.gff"
+    )  ## no
+    output_prodigal_prot = os.path.join(
+        prodigal_dir_sample, f_name + "_prot.faa"
+    )  ## no
+    output_prodigal_dna = os.path.join(prodigal_dir_sample, f_name + "_dna.faa")
 
     if not os.path.exists(prodigal_dir_sample):
         try:
             os.makedirs(prodigal_dir_sample)
-            logger.debug('Created prodigal directory for Core Gene %s', f_name)
+            logger.debug("Created prodigal directory for Core Gene %s", f_name)
         except:
-            logger.info('Cannot create prodigal directory for Core Gene %s' , f_name)
-            print ('Error when creating the directory %s for prodigal genes prediction', prodigal_dir_sample)
+            logger.info("Cannot create prodigal directory for Core Gene %s", f_name)
+            print(
+                "Error when creating the directory %s for prodigal genes prediction",
+                prodigal_dir_sample,
+            )
             exit(0)
 
-        prodigal_command = ['prodigal' , '-i', file_name , '-t', prodigal_train_dir, '-f', 'gff', '-o', output_prodigal_coord, '-a', output_prodigal_prot, '-d', output_prodigal_dna]
-        prodigal_result = subprocess.run(prodigal_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        prodigal_command = [
+            "prodigal",
+            "-i",
+            file_name,
+            "-t",
+            prodigal_train_dir,
+            "-f",
+            "gff",
+            "-o",
+            output_prodigal_coord,
+            "-a",
+            output_prodigal_prot,
+            "-d",
+            output_prodigal_dna,
+        ]
+        prodigal_result = subprocess.run(
+            prodigal_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
 
         # if prodigal_result.stderr:
-          #  logger.error('cannot predict genes for %s ', f_name)
-           # logger.error('prodigal returning error code %s', prodigal_result.stderr)
-            #return False
+        #  logger.error('cannot predict genes for %s ', f_name)
+        # logger.error('prodigal returning error code %s', prodigal_result.stderr)
+        # return False
     else:
-        logger.info('Skeeping prodigal genes prediction for %s, as it has already been made', f_name)
+        logger.info(
+            "Skeeping prodigal genes prediction for %s, as it has already been made",
+            f_name,
+        )
 
     return True
 
@@ -206,64 +269,111 @@ def prodigal_prediction(file_name, prodigal_dir, prodigal_train_dir, logger):
 # Get Prodigal predicted gene sequence equivalent to BLAST result matching bad quality allele or to no Exact Match BLAST result in allele calling analysis #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · *  #
 
-def get_prodigal_sequence(blast_sseq, contig_blast_id, prodigal_directory, sample_name, blast_parameters, logger):
 
+def get_prodigal_sequence(
+    blast_sseq,
+    contig_blast_id,
+    prodigal_directory,
+    sample_name,
+    blast_parameters,
+    logger,
+):
     prodigal_directory_sample = os.path.join(prodigal_directory, sample_name)
-    genes_file = os.path.join(prodigal_directory_sample, sample_name + '_dna.faa')
+    genes_file = os.path.join(prodigal_directory_sample, sample_name + "_dna.faa")
 
     ## Create directory for storing prodigal genes prediction per contig BLAST databases
-    blastdb_per_contig_directory = 'blastdb_per_contig'
-    full_path_blastdb_per_contig = os.path.join(prodigal_directory_sample, blastdb_per_contig_directory)
+    blastdb_per_contig_directory = "blastdb_per_contig"
+    full_path_blastdb_per_contig = os.path.join(
+        prodigal_directory_sample, blastdb_per_contig_directory
+    )
     if not os.path.exists(full_path_blastdb_per_contig):
         try:
             os.makedirs(full_path_blastdb_per_contig)
-            logger.info('Directory %s has been created', full_path_blastdb_per_contig)
+            logger.info("Directory %s has been created", full_path_blastdb_per_contig)
         except:
-            print ('Cannot create the directory ', full_path_blastdb_per_contig)
-            logger.info('Directory %s cannot be created', full_path_blastdb_per_contig)
-            exit (0)
+            print("Cannot create the directory ", full_path_blastdb_per_contig)
+            logger.info("Directory %s cannot be created", full_path_blastdb_per_contig)
+            exit(0)
 
     ## Create directory for storing prodigal genes prediction sequences per contig
-    prodigal_genes_per_contig_directory = 'prodigal_genes_per_contig'
-    full_path_prodigal_genes_per_contig = os.path.join(prodigal_directory_sample, prodigal_genes_per_contig_directory)
+    prodigal_genes_per_contig_directory = "prodigal_genes_per_contig"
+    full_path_prodigal_genes_per_contig = os.path.join(
+        prodigal_directory_sample, prodigal_genes_per_contig_directory
+    )
     if not os.path.exists(full_path_prodigal_genes_per_contig):
         try:
             os.makedirs(full_path_prodigal_genes_per_contig)
-            logger.info('Directory %s has been created', full_path_prodigal_genes_per_contig)
+            logger.info(
+                "Directory %s has been created", full_path_prodigal_genes_per_contig
+            )
         except:
-            print ('Cannot create the directory ', full_path_prodigal_genes_per_contig)
-            logger.info('Directory %s cannot be created', full_path_prodigal_genes_per_contig)
-            exit (0)
+            print("Cannot create the directory ", full_path_prodigal_genes_per_contig)
+            logger.info(
+                "Directory %s cannot be created", full_path_prodigal_genes_per_contig
+            )
+            exit(0)
 
     ## Parse prodigal genes prediction fasta file
     predicted_genes = SeqIO.parse(genes_file, "fasta")
 
     ## Create fasta file containing Prodigal predicted genes sequences for X contig in sample
-    contig_genes_path = os.path.join(full_path_prodigal_genes_per_contig, contig_blast_id + '.fasta')
-    with open (contig_genes_path, 'w') as out_fh:
+    contig_genes_path = os.path.join(
+        full_path_prodigal_genes_per_contig, contig_blast_id + ".fasta"
+    )
+    with open(contig_genes_path, "w") as out_fh:
         for rec in predicted_genes:
-            contig_prodigal_id = '_'.join((rec.id).split("_")[:-1])
+            contig_prodigal_id = "_".join((rec.id).split("_")[:-1])
             if contig_prodigal_id == contig_blast_id:
-                out_fh.write ('>' + str(rec.description) + '\n' + str(rec.seq) + '\n')
+                out_fh.write(">" + str(rec.description) + "\n" + str(rec.seq) + "\n")
 
     ## Create local BLAST database for Prodigal predicted genes sequences for X contig in sample
-    if not create_blastdb(contig_genes_path, full_path_blastdb_per_contig, 'nucl', logger):
-        print('Error when creating the blastdb for samples files. Check log file for more information. \n ')
+    if not create_blastdb(
+        contig_genes_path, full_path_blastdb_per_contig, "nucl", logger
+    ):
+        print(
+            "Error when creating the blastdb for samples files. Check log file for more information. \n "
+        )
         return False
 
     ## Local BLAST Prodigal predicted genes sequences database VS BLAST sequence obtained in sample in allele calling analysis
-    blast_db_name = os.path.join(full_path_blastdb_per_contig, contig_blast_id, contig_blast_id)
-
-    cline = NcbiblastnCommandline(db=blast_db_name, evalue=0.001, perc_identity = 90, outfmt= blast_parameters, max_target_seqs=10, max_hsps=10, num_threads=1)
-    out, err = cline(stdin = blast_sseq)
+    blast_db_name = os.path.join(
+        full_path_blastdb_per_contig, contig_blast_id, contig_blast_id
+    )
+
+    cline = NcbiblastnCommandline(
+        db=blast_db_name,
+        evalue=0.001,
+        perc_identity=90,
+        outfmt=blast_parameters,
+        max_target_seqs=10,
+        max_hsps=10,
+        num_threads=1,
+    )
+    out, err = cline(stdin=blast_sseq)
     out_lines = out.splitlines()
 
     bigger_bitscore = 0
-    if len (out_lines) > 0 :
-        for line in out_lines :
-            values = line.split('\t')
-            if  float(values[8]) > bigger_bitscore:
-                qseqid , sseqid , pident ,  qlen , s_length , mismatch , r_gapopen , r_evalue , bitscore , sstart , send , qstart , qend ,sseq , qseq = values
+    if len(out_lines) > 0:
+        for line in out_lines:
+            values = line.split("\t")
+            if float(values[8]) > bigger_bitscore:
+                (
+                    qseqid,
+                    sseqid,
+                    pident,
+                    qlen,
+                    s_length,
+                    mismatch,
+                    r_gapopen,
+                    r_evalue,
+                    bitscore,
+                    sstart,
+                    send,
+                    qstart,
+                    qend,
+                    sseq,
+                    qseq,
+                ) = values
                 bigger_bitscore = float(bitscore)
 
         ## Get complete Prodigal sequence matching allele calling BLAST sequence using ID
@@ -272,41 +382,51 @@ def get_prodigal_sequence(blast_sseq, contig_blast_id, prodigal_directory, sampl
         for rec in predicted_genes_in_contig:
             if rec.id == sseqid:
                 predicted_gene_sequence = str(rec.seq)
-                start_prodigal = str(rec.description.split( '#')[1])
-                end_prodigal = str(rec.description.split('#')[2])
+                start_prodigal = str(rec.description.split("#")[1])
+                end_prodigal = str(rec.description.split("#")[2])
                 break
 
     ## Sequence not found by Prodigal when there are no BLAST results matching allele calling BLAST sequence
-    if len (out_lines) == 0:
-        predicted_gene_sequence = 'Sequence not found by Prodigal'
-        start_prodigal = '-'
-        end_prodigal = '-'
+    if len(out_lines) == 0:
+        predicted_gene_sequence = "Sequence not found by Prodigal"
+        start_prodigal = "-"
+        end_prodigal = "-"
 
-    return predicted_gene_sequence, start_prodigal, end_prodigal ### start_prodigal y end_prodigal para report prodigal
+    return (
+        predicted_gene_sequence,
+        start_prodigal,
+        end_prodigal,
+    )  ### start_prodigal y end_prodigal para report prodigal
 
 
 # · * · * · * · * · * · * · * · * · * · * · * · * #
 # Get samples info before allele calling analysis #
 # · * · * · * · * · * · * · * · * · * · * · * · * #
 
-def prepare_samples(sample_file_list, store_dir, reference_genome_file, logger):
 
+def prepare_samples(sample_file_list, store_dir, reference_genome_file, logger):
     ## Initialize dictionary for keeping id-contig
     contigs_in_sample_dict = {}
 
     ## Paths for samples blastdb, Prodigal genes prediction and BLAST results
-    blast_dir = os.path.join(store_dir,'blastdb')
-    prodigal_dir = os.path.join(store_dir,'prodigal')
-    blast_results_seq_dir = os.path.join(store_dir,'blast_results', 'blast_results_seq')
+    blast_dir = os.path.join(store_dir, "blastdb")
+    prodigal_dir = os.path.join(store_dir, "prodigal")
+    blast_results_seq_dir = os.path.join(
+        store_dir, "blast_results", "blast_results_seq"
+    )
 
     ## Get training file for Prodigal genes prediction
-    output_prodigal_train_dir = prodigal_training(reference_genome_file, prodigal_dir, logger)
+    output_prodigal_train_dir = prodigal_training(
+        reference_genome_file, prodigal_dir, logger
+    )
     if not output_prodigal_train_dir:
-        print('Error when creating training file for genes prediction. Check log file for more information. \n ')
+        print(
+            "Error when creating training file for genes prediction. Check log file for more information. \n "
+        )
         return False
 
     for fasta_file in sample_file_list:
-        f_name = '.'.join(os.path.basename(fasta_file).split('.')[:-1])
+        f_name = ".".join(os.path.basename(fasta_file).split(".")[:-1])
 
         # Get samples id-contig dictionary
         fasta_file_parsed_dict = parsing_fasta_file_to_dict(fasta_file, logger)
@@ -315,8 +435,8 @@ def prepare_samples(sample_file_list, store_dir, reference_genome_file, logger):
         contigs_in_sample_dict[f_name] = fasta_file_parsed_dict
 
         # dump fasta file into pickle file
-        #with open (file_list[-1],'wb') as f: # generación de diccionarios de contigs para cada muestra
-         #   pickle.dump(fasta_file_parsed_dict, f)
+        # with open (file_list[-1],'wb') as f: # generación de diccionarios de contigs para cada muestra
+        #   pickle.dump(fasta_file_parsed_dict, f)
 
         # Create directory for storing BLAST results using reference allele(s)
         blast_results_seq_per_sample_dir = os.path.join(blast_results_seq_dir, f_name)
@@ -324,35 +444,51 @@ def prepare_samples(sample_file_list, store_dir, reference_genome_file, logger):
         if not os.path.exists(blast_results_seq_per_sample_dir):
             try:
                 os.makedirs(blast_results_seq_per_sample_dir)
-                logger.debug('Created blast results directory for sample %s', f_name)
+                logger.debug("Created blast results directory for sample %s", f_name)
             except:
-                logger.info('Cannot create blast results directory for sample %s', f_name)
-                print ('Error when creating the directory for blast results', blast_results_seq_per_sample_dir)
+                logger.info(
+                    "Cannot create blast results directory for sample %s", f_name
+                )
+                print(
+                    "Error when creating the directory for blast results",
+                    blast_results_seq_per_sample_dir,
+                )
                 exit(0)
 
         # Prodigal genes prediction for each sample
-        if not prodigal_prediction(fasta_file, prodigal_dir, output_prodigal_train_dir, logger):
-            print('Error when predicting genes for samples files. Check log file for more information. \n ')
+        if not prodigal_prediction(
+            fasta_file, prodigal_dir, output_prodigal_train_dir, logger
+        ):
+            print(
+                "Error when predicting genes for samples files. Check log file for more information. \n "
+            )
             return False
 
         # Create local BLAST db for each sample fasta file
-        if not create_blastdb(fasta_file, blast_dir, 'nucl', logger):
-            print('Error when creating the blastdb for samples files. Check log file for more information. \n ')
+        if not create_blastdb(fasta_file, blast_dir, "nucl", logger):
+            print(
+                "Error when creating the blastdb for samples files. Check log file for more information. \n "
+            )
             return False
 
     return contigs_in_sample_dict
 
+
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * #
 # Get established length thresholds for allele tagging in allele calling analysis #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * #
 
-def length_thresholds(core_name, schema_statistics, percent): ### logger
 
+def length_thresholds(core_name, schema_statistics, percent):  ### logger
     locus_mean = int(schema_statistics[core_name][0])
 
     if percent != "SD":
-        max_length_threshold = math.ceil(locus_mean + ((locus_mean * float(percent)) / 100))
-        min_length_threshold = math.floor(locus_mean - ((locus_mean * float(percent)) / 100))
+        max_length_threshold = math.ceil(
+            locus_mean + ((locus_mean * float(percent)) / 100)
+        )
+        min_length_threshold = math.floor(
+            locus_mean - ((locus_mean * float(percent)) / 100)
+        )
     else:
         percent = float(schema_statistics[core_name][1])
 
@@ -366,103 +502,156 @@ def length_thresholds(core_name, schema_statistics, percent): ### logger
 # Convert dna sequence to protein sequence  #
 # · * · * · * · * · * · * · * · * · * · * · #
 
-def convert_to_protein (sequence) :
 
+def convert_to_protein(sequence):
     seq = Seq.Seq(sequence)
     protein = str(seq.translate())
 
     return protein
 
+
 # · * · * · * · * · * · * · * · * · * · * · * · * · * #
 # Get SNPs between BLAST sequence and matching allele #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * #
 
-def get_snp (sample, query) :
 
-    prot_annotation = {'S': 'polar' ,'T': 'polar' ,'Y': 'polar' ,'Q': 'polar' ,'N': 'polar' ,'C': 'polar' ,'S': 'polar' ,
-                        'F': 'nonpolar' ,'L': 'nonpolar','I': 'nonpolar','M': 'nonpolar','P': 'nonpolar','V': 'nonpolar','A': 'nonpolar','W': 'nonpolar','G': 'nonpolar',
-                        'D' : 'acidic', 'E' :'acidic',
-                        'H': 'basic' , 'K': 'basic' , 'R' : 'basic',
-                        '-': '-----', '*' : 'Stop codon'}
+def get_snp(sample, query):
+    prot_annotation = {
+        "S": "polar",
+        "T": "polar",
+        "Y": "polar",
+        "Q": "polar",
+        "N": "polar",
+        "C": "polar",
+        "S": "polar",
+        "F": "nonpolar",
+        "L": "nonpolar",
+        "I": "nonpolar",
+        "M": "nonpolar",
+        "P": "nonpolar",
+        "V": "nonpolar",
+        "A": "nonpolar",
+        "W": "nonpolar",
+        "G": "nonpolar",
+        "D": "acidic",
+        "E": "acidic",
+        "H": "basic",
+        "K": "basic",
+        "R": "basic",
+        "-": "-----",
+        "*": "Stop codon",
+    }
     snp_list = []
-    sample = sample.replace('-','')
-    #length = max(len(sample), len(query))
+    sample = sample.replace("-", "")
+    # length = max(len(sample), len(query))
     length = len(query)
     # normalize the length of the sample for the iteration
-    if len(sample) < length :
+    if len(sample) < length:
         need_to_add = length - len(sample)
-        sample = sample + need_to_add * '-'
+        sample = sample + need_to_add * "-"
 
     # convert to Seq class to translate to protein
     seq_sample = Seq.Seq(sample)
     seq_query = Seq.Seq(query)
 
     for index in range(length):
-        if seq_query[index] != seq_sample[index] :
+        if seq_query[index] != seq_sample[index]:
             triple_index = index - (index % 3)
             codon_seq = seq_sample[triple_index : triple_index + 3]
             codon_que = seq_query[triple_index : triple_index + 3]
-            if not '-' in str(codon_seq) :
+            if not "-" in str(codon_seq):
                 prot_seq = str(codon_seq.translate())
                 prot_que = str(codon_que.translate())
             else:
-                prot_seq = '-'
-                prot_que = str(seq_query[triple_index: ].translate())
-            if prot_annotation[prot_que[0]] == prot_annotation[prot_seq[0]] :
-                missense_synonym = 'Synonymous'
-            elif prot_seq == '*' :
-                    missense_synonym = 'Nonsense'
+                prot_seq = "-"
+                prot_que = str(seq_query[triple_index:].translate())
+            if prot_annotation[prot_que[0]] == prot_annotation[prot_seq[0]]:
+                missense_synonym = "Synonymous"
+            elif prot_seq == "*":
+                missense_synonym = "Nonsense"
             else:
-                missense_synonym = 'Missense'
-            #snp_list.append([str(index+1),str(seq_sample[index]) + '/' + str(seq_query[index]), str(codon_seq) + '/'+ str(codon_que),
-            snp_list.append([str(index+1),str(seq_query[index]) + '/' + str(seq_sample[index]), str(codon_que) + '/'+ str(codon_seq),
-                             # when one of the sequence ends but not the other we will translate the remain sequence to proteins
-                             # in that case we will only annotate the first protein. Using [0] as key of the dictionary  annotation
-                             prot_que + '/' + prot_seq, missense_synonym, prot_annotation[prot_que[0]] + ' / ' + prot_annotation[prot_seq[0]]])
-            if '-' in str(codon_seq) :
+                missense_synonym = "Missense"
+            # snp_list.append([str(index+1),str(seq_sample[index]) + '/' + str(seq_query[index]), str(codon_seq) + '/'+ str(codon_que),
+            snp_list.append(
+                [
+                    str(index + 1),
+                    str(seq_query[index]) + "/" + str(seq_sample[index]),
+                    str(codon_que) + "/" + str(codon_seq),
+                    # when one of the sequence ends but not the other we will translate the remain sequence to proteins
+                    # in that case we will only annotate the first protein. Using [0] as key of the dictionary  annotation
+                    prot_que + "/" + prot_seq,
+                    missense_synonym,
+                    prot_annotation[prot_que[0]] + " / " + prot_annotation[prot_seq[0]],
+                ]
+            )
+            if "-" in str(codon_seq):
                 break
     return snp_list
 
 
-def nucleotide_to_protein_alignment (sample_seq, query_seq ) : ### Sustituido por get_alignment
+def nucleotide_to_protein_alignment(
+    sample_seq, query_seq
+):  ### Sustituido por get_alignment
     aligment = []
     sample_prot = convert_to_protein(sample_seq)
     query_prot = convert_to_protein(query_seq)
     minimun_length = min(len(sample_prot), len(query_prot))
     for i in range(minimun_length):
-        if sample_prot[i] == query_prot[i] :
-            aligment.append('|')
+        if sample_prot[i] == query_prot[i]:
+            aligment.append("|")
         else:
-            aligment.append(' ')
-    protein_alignment = [['sample', sample_prot],['match', ''.join(aligment)], ['schema', query_prot]]
+            aligment.append(" ")
+    protein_alignment = [
+        ["sample", sample_prot],
+        ["match", "".join(aligment)],
+        ["schema", query_prot],
+    ]
 
     return protein_alignment
 
 
-def get_alignment_for_indels (blast_db_name, qseq) : ### Sustituido por get_alignment
-    #match_alignment =[]
-    cline = NcbiblastnCommandline(db=blast_db_name, evalue=0.001, perc_identity = 80, outfmt= 5, max_target_seqs=10, max_hsps=10,num_threads=1)
-    out, err = cline(stdin = qseq)
+def get_alignment_for_indels(blast_db_name, qseq):  ### Sustituido por get_alignment
+    # match_alignment =[]
+    cline = NcbiblastnCommandline(
+        db=blast_db_name,
+        evalue=0.001,
+        perc_identity=80,
+        outfmt=5,
+        max_target_seqs=10,
+        max_hsps=10,
+        num_threads=1,
+    )
+    out, err = cline(stdin=qseq)
     psiblast_xml = StringIO(out)
     blast_records = NCBIXML.parse(psiblast_xml)
     for blast_record in blast_records:
         for alignment in blast_record.alignments:
             for match in alignment.hsps:
-                match_alignment = [['sample', match.sbjct],['match', match.match], ['schema',match.query]]
+                match_alignment = [
+                    ["sample", match.sbjct],
+                    ["match", match.match],
+                    ["schema", match.query],
+                ]
     return match_alignment
 
 
-def get_alignment_for_deletions (sample_seq, query_seq): ### Sustituido por get_alignment
+def get_alignment_for_deletions(
+    sample_seq, query_seq
+):  ### Sustituido por get_alignment
     index_found = False
     alignments = pairwise2.align.globalxx(sample_seq, query_seq)
-    for index in range(len(alignments)) :
-        if alignments[index][4] == len(query_seq) :
+    for index in range(len(alignments)):
+        if alignments[index][4] == len(query_seq):
             index_found = True
             break
-    if not index_found :
+    if not index_found:
         index = 0
-    values = format_alignment(*alignments[index]).split('\n')
-    match_alignment = [['sample', values[0]],['match', values[1]], ['schema',values[2]]]
+    values = format_alignment(*alignments[index]).split("\n")
+    match_alignment = [
+        ["sample", values[0]],
+        ["match", values[1]],
+        ["schema", values[2]],
+    ]
     return match_alignment
 
 
@@ -470,8 +659,10 @@ def get_alignment_for_deletions (sample_seq, query_seq): ### Sustituido por get_
 # Get DNA and protein alignment between the final sequence found in the sample and the matching allele #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · *  #
 
-def get_alignment (sample_seq, query_seq, reward, penalty, gapopen, gapextend, seq_type = "dna"):
 
+def get_alignment(
+    sample_seq, query_seq, reward, penalty, gapopen, gapextend, seq_type="dna"
+):
     ## If sequences alignment type desired is "protein" convert dna sequences to protein
     if seq_type == "protein":
         sample_seq = convert_to_protein(sample_seq)
@@ -479,9 +670,15 @@ def get_alignment (sample_seq, query_seq, reward, penalty, gapopen, gapextend, s
 
     ## Get dna/protein alignment between final sequence found and matching allele
     # arguments pairwise2.align.globalms: match, mismatch, gap opening, gap extending
-    alignments = pairwise2.align.localms(sample_seq, query_seq, reward, penalty, -gapopen, -gapextend)
-    values = format_alignment(*alignments[0]).split('\n')
-    match_alignment = [['sample', values[0]],['match', values[1]], ['schema',values[2]]]
+    alignments = pairwise2.align.localms(
+        sample_seq, query_seq, reward, penalty, -gapopen, -gapextend
+    )
+    values = format_alignment(*alignments[0]).split("\n")
+    match_alignment = [
+        ["sample", values[0]],
+        ["match", values[1]],
+        ["schema", values[2]],
+    ]
 
     return match_alignment
 
@@ -490,102 +687,135 @@ def get_alignment (sample_seq, query_seq, reward, penalty, gapopen, gapextend, s
 # Tag LNF cases and keep LNF info #
 # · * · * · * · * · * · * · * · * #
 
-def lnf_tpr_tag(core_name, sample_name, alleles_in_locus_dict, samples_matrix_dict, lnf_tpr_dict, schema_statistics, locus_alleles_path, qseqid, pident, s_length, new_sequence_length, perc_identity_ref, coverage, schema_quality, annotation_core_dict, count_dict, logger):
 
+def lnf_tpr_tag(
+    core_name,
+    sample_name,
+    alleles_in_locus_dict,
+    samples_matrix_dict,
+    lnf_tpr_dict,
+    schema_statistics,
+    locus_alleles_path,
+    qseqid,
+    pident,
+    s_length,
+    new_sequence_length,
+    perc_identity_ref,
+    coverage,
+    schema_quality,
+    annotation_core_dict,
+    count_dict,
+    logger,
+):
     gene_annot, product_annot = annotation_core_dict[core_name]
 
-    if qseqid == '-':
-        samples_matrix_dict[sample_name].append('LNF')
-        tag_report = 'LNF'
-        matching_allele_length = '-'
+    if qseqid == "-":
+        samples_matrix_dict[sample_name].append("LNF")
+        tag_report = "LNF"
+        matching_allele_length = "-"
 
     else:
-        if new_sequence_length == '-':
-            samples_matrix_dict[sample_name].append('LNF_' + str(qseqid))
-            tag_report = 'LNF'
+        if new_sequence_length == "-":
+            samples_matrix_dict[sample_name].append("LNF_" + str(qseqid))
+            tag_report = "LNF"
         else:
-            samples_matrix_dict[sample_name].append('TPR_' + str(qseqid))
-            tag_report = 'TPR'
+            samples_matrix_dict[sample_name].append("TPR_" + str(qseqid))
+            tag_report = "TPR"
 
         matching_allele_seq = alleles_in_locus_dict[core_name][qseqid]
         matching_allele_length = len(matching_allele_seq)
 
-        #alleles_in_locus = list (SeqIO.parse(locus_alleles_path, "fasta")) ## parse
-        #for allele in alleles_in_locus : ## parse
-            #if allele.id == qseqid : ## parse
-                #break ## parse
-        #matching_allele_seq = str(allele.seq) ## parse
-        #matching_allele_length = len(matching_allele_seq) ## parse
+        # alleles_in_locus = list (SeqIO.parse(locus_alleles_path, "fasta")) ## parse
+        # for allele in alleles_in_locus : ## parse
+        # if allele.id == qseqid : ## parse
+        # break ## parse
+        # matching_allele_seq = str(allele.seq) ## parse
+        # matching_allele_length = len(matching_allele_seq) ## parse
 
-    if pident == '-':
+    if pident == "-":
         # (los dos BLAST sin resultado)
-        coverage_blast = '-'
-        coverage_new_sequence = '-'
-        add_info = 'Locus not found'
-        logger.info('Locus not found at sample %s, for gene %s', sample_name, core_name)
+        coverage_blast = "-"
+        coverage_new_sequence = "-"
+        add_info = "Locus not found"
+        logger.info("Locus not found at sample %s, for gene %s", sample_name, core_name)
 
         # Get allele quality
-        allele_quality = '-'
+        allele_quality = "-"
 
         # (recuento tags para plot)
-        count_dict[sample_name]['not_found'] += 1
-        count_dict[sample_name]['total'] += 1
+        count_dict[sample_name]["not_found"] += 1
+        count_dict[sample_name]["total"] += 1
 
     elif 90 > float(pident):
         # (BLAST 90 sin resultado y BLAST 70 con resultado)
-        coverage_blast = '-'
-        coverage_new_sequence = '-'
-        add_info = 'BLAST sequence ID under threshold: {}%'.format(perc_identity_ref)
-        logger.info('BLAST sequence ID %s under threshold at sample %s, for gene  %s', pident, sample_name, core_name)
+        coverage_blast = "-"
+        coverage_new_sequence = "-"
+        add_info = "BLAST sequence ID under threshold: {}%".format(perc_identity_ref)
+        logger.info(
+            "BLAST sequence ID %s under threshold at sample %s, for gene  %s",
+            pident,
+            sample_name,
+            core_name,
+        )
 
         # Get allele quality
-        allele_quality = '-'
+        allele_quality = "-"
 
         # (recuento tags para plot)
-        count_dict[sample_name]['low_id'] += 1
-        count_dict[sample_name]['total'] += 1
+        count_dict[sample_name]["low_id"] += 1
+        count_dict[sample_name]["total"] += 1
 
-    elif 90 <= float(pident) and new_sequence_length == '-':
+    elif 90 <= float(pident) and new_sequence_length == "-":
         # (BLAST 90 con resultado, bajo coverage BLAST)
         locus_mean = int(schema_statistics[core_name][0])
         coverage_blast = int(s_length) / locus_mean
-        #coverage_blast = int(s_length) / matching_allele_length
-        coverage_new_sequence = '-'
+        # coverage_blast = int(s_length) / matching_allele_length
+        coverage_new_sequence = "-"
         if coverage_blast < 1:
-            add_info = 'BLAST sequence coverage under threshold: {}%'.format(coverage)
+            add_info = "BLAST sequence coverage under threshold: {}%".format(coverage)
         else:
-            add_info = 'BLAST sequence coverage above threshold: {}%'.format(coverage)
-        logger.info('BLAST sequence coverage %s under threshold at sample %s, for gene  %s', coverage_blast, sample_name, core_name)
+            add_info = "BLAST sequence coverage above threshold: {}%".format(coverage)
+        logger.info(
+            "BLAST sequence coverage %s under threshold at sample %s, for gene  %s",
+            coverage_blast,
+            sample_name,
+            core_name,
+        )
 
         # Get allele quality
-        allele_quality = '-'
+        allele_quality = "-"
 
         # (recuento tags para plot)
-        count_dict[sample_name]['low_coverage'] += 1
-        count_dict[sample_name]['total'] += 1
+        count_dict[sample_name]["low_coverage"] += 1
+        count_dict[sample_name]["total"] += 1
 
-    elif 90 <= float(pident) and new_sequence_length != '-':
+    elif 90 <= float(pident) and new_sequence_length != "-":
         # (BLAST 90 con resultado, buen coverage BLAST, bajo coverage new_sseq)
         locus_mean = int(schema_statistics[core_name][0])
         coverage_blast = int(s_length) / locus_mean * 100
-        #coverage_blast = int(s_length) / matching_allele_length
+        # coverage_blast = int(s_length) / matching_allele_length
         coverage_new_sequence = new_sequence_length / matching_allele_length * 100
         if coverage_new_sequence < 1:
-            add_info = 'New sequence coverage under threshold: {}%'.format(coverage)
+            add_info = "New sequence coverage under threshold: {}%".format(coverage)
         else:
-            add_info = 'New sequence coverage above threshold: {}%'.format(coverage)
-        logger.info('New sequence coverage %s under threshold at sample %s, for gene  %s', coverage_new_sequence, sample_name, core_name)
+            add_info = "New sequence coverage above threshold: {}%".format(coverage)
+        logger.info(
+            "New sequence coverage %s under threshold at sample %s, for gene  %s",
+            coverage_new_sequence,
+            sample_name,
+            core_name,
+        )
 
         # Get allele quality
         allele_quality = schema_quality[core_name][qseqid]
 
         # (recuento tags para plot)
-        count_dict[sample_name]['total'] += 1
+        count_dict[sample_name]["total"] += 1
         for count_class in count_dict[sample_name]:
             if count_class in allele_quality:
                 count_dict[sample_name][count_class] += 1
-                #if "bad_quality" in allele_quality:
-                 #   count_dict[sample_name]['bad_quality'] += 1
+                # if "bad_quality" in allele_quality:
+                #   count_dict[sample_name]['bad_quality'] += 1
 
     ## Keeping LNF and TPR report info
     if not core_name in lnf_tpr_dict:
@@ -593,7 +823,22 @@ def lnf_tpr_tag(core_name, sample_name, alleles_in_locus_dict, samples_matrix_di
     if not sample_name in lnf_tpr_dict[core_name]:
         lnf_tpr_dict[core_name][sample_name] = []
 
-    lnf_tpr_dict[core_name][sample_name].append([gene_annot, product_annot, tag_report, qseqid, allele_quality, pident, str(coverage_blast), str(coverage_new_sequence), str(matching_allele_length), str(s_length), str(new_sequence_length), add_info]) ### Meter secuencias alelo, blast y new_sseq (si las hay)?
+    lnf_tpr_dict[core_name][sample_name].append(
+        [
+            gene_annot,
+            product_annot,
+            tag_report,
+            qseqid,
+            allele_quality,
+            pident,
+            str(coverage_blast),
+            str(coverage_new_sequence),
+            str(matching_allele_length),
+            str(s_length),
+            str(new_sequence_length),
+            add_info,
+        ]
+    )  ### Meter secuencias alelo, blast y new_sseq (si las hay)?
 
     return True
 
@@ -602,20 +847,37 @@ def lnf_tpr_tag(core_name, sample_name, alleles_in_locus_dict, samples_matrix_di
 # Tag paralog and exact match cases and keep info #
 # · * · * · * · * · * · * · * · * · * · * · * · * #
 
-def paralog_exact_tag(sample_name, core_name, tag, schema_quality, matching_genes_dict, samples_matrix_dict, allele_found, tag_dict, prodigal_report, prodigal_directory, blast_parameters, annotation_core_dict, count_dict, logger):
 
-    logger.info('Found %s at sample %s for core gene %s ', tag, sample_name, core_name)
-
-    paralog_quality_count = [] # (lista para contabilizar parálogos debido a bad o good quality)
+def paralog_exact_tag(
+    sample_name,
+    core_name,
+    tag,
+    schema_quality,
+    matching_genes_dict,
+    samples_matrix_dict,
+    allele_found,
+    tag_dict,
+    prodigal_report,
+    prodigal_directory,
+    blast_parameters,
+    annotation_core_dict,
+    count_dict,
+    logger,
+):
+    logger.info("Found %s at sample %s for core gene %s ", tag, sample_name, core_name)
+
+    paralog_quality_count = (
+        []
+    )  # (lista para contabilizar parálogos debido a bad o good quality)
 
     gene_annot, product_annot = annotation_core_dict[core_name]
 
-    if not sample_name in tag_dict :
+    if not sample_name in tag_dict:
         tag_dict[sample_name] = {}
-    if not core_name in tag_dict[sample_name] :
-        tag_dict[sample_name][core_name]= []
+    if not core_name in tag_dict[sample_name]:
+        tag_dict[sample_name][core_name] = []
 
-    if tag == 'EXACT':
+    if tag == "EXACT":
         allele = list(allele_found.keys())[0]
         qseqid = allele_found[allele][0]
         tag = qseqid
@@ -623,8 +885,24 @@ def paralog_exact_tag(sample_name, core_name, tag, schema_quality, matching_gene
     samples_matrix_dict[sample_name].append(tag)
 
     for sequence in allele_found:
-        qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = allele_found[sequence]
-        sseq = sseq.replace('-', '')
+        (
+            qseqid,
+            sseqid,
+            pident,
+            qlen,
+            s_length,
+            mismatch,
+            r_gapopen,
+            r_evalue,
+            bitscore,
+            sstart,
+            send,
+            qstart,
+            qend,
+            sseq,
+            qseq,
+        ) = allele_found[sequence]
+        sseq = sseq.replace("-", "")
 
         # Get allele quality
         allele_quality = schema_quality[core_name][qseqid]
@@ -633,35 +911,87 @@ def paralog_exact_tag(sample_name, core_name, tag, schema_quality, matching_gene
             paralog_quality_count.append(allele_quality)
 
         # Get prodigal gene prediction if allele quality is 'bad_quality'
-        if 'bad_quality' in allele_quality:
-            complete_predicted_seq, start_prodigal, end_prodigal = get_prodigal_sequence(sseq, sseqid, prodigal_directory, sample_name, blast_parameters, logger)
+        if "bad_quality" in allele_quality:
+            (
+                complete_predicted_seq,
+                start_prodigal,
+                end_prodigal,
+            ) = get_prodigal_sequence(
+                sseq, sseqid, prodigal_directory, sample_name, blast_parameters, logger
+            )
 
             ##### informe prodigal #####
-            prodigal_report.append([core_name, sample_name, qseqid, tag, sstart, send, start_prodigal, end_prodigal, sseq, complete_predicted_seq])
+            prodigal_report.append(
+                [
+                    core_name,
+                    sample_name,
+                    qseqid,
+                    tag,
+                    sstart,
+                    send,
+                    start_prodigal,
+                    end_prodigal,
+                    sseq,
+                    complete_predicted_seq,
+                ]
+            )
 
         else:
-            complete_predicted_seq = '-'
+            complete_predicted_seq = "-"
 
-        if not sseqid in matching_genes_dict[sample_name] :
+        if not sseqid in matching_genes_dict[sample_name]:
             matching_genes_dict[sample_name][sseqid] = []
-        if sstart > send :
-            #matching_genes_dict[sample_name][sseqid].append([core_name, sstart, send,'-', tag])
-            matching_genes_dict[sample_name][sseqid].append([core_name, qseqid, sstart, send,'-', tag])
+        if sstart > send:
+            # matching_genes_dict[sample_name][sseqid].append([core_name, sstart, send,'-', tag])
+            matching_genes_dict[sample_name][sseqid].append(
+                [core_name, qseqid, sstart, send, "-", tag]
+            )
         else:
-            #matching_genes_dict[sample_name][sseqid].append([core_name, sstart, send,'+', tag])
-            matching_genes_dict[sample_name][sseqid].append([core_name, qseqid, sstart, send,'+', tag])
+            # matching_genes_dict[sample_name][sseqid].append([core_name, sstart, send,'+', tag])
+            matching_genes_dict[sample_name][sseqid].append(
+                [core_name, qseqid, sstart, send, "+", tag]
+            )
 
         ## Keeping paralog NIPH/NIPHEM report info
-        if tag == 'NIPH' or tag == 'NIPHEM':
-            tag_dict[sample_name][core_name].append([gene_annot, product_annot, tag, pident, qseqid, allele_quality, sseqid, bitscore, sstart, send, sseq, complete_predicted_seq])
+        if tag == "NIPH" or tag == "NIPHEM":
+            tag_dict[sample_name][core_name].append(
+                [
+                    gene_annot,
+                    product_annot,
+                    tag,
+                    pident,
+                    qseqid,
+                    allele_quality,
+                    sseqid,
+                    bitscore,
+                    sstart,
+                    send,
+                    sseq,
+                    complete_predicted_seq,
+                ]
+            )
         else:
-            tag_dict[sample_name][core_name] = [gene_annot, product_annot, qseqid, allele_quality, sseqid, s_length, sstart, send, sseq, complete_predicted_seq]
+            tag_dict[sample_name][core_name] = [
+                gene_annot,
+                product_annot,
+                qseqid,
+                allele_quality,
+                sseqid,
+                s_length,
+                sstart,
+                send,
+                sseq,
+                complete_predicted_seq,
+            ]
 
             # (recuento tags para plot)
-            count_dict[sample_name]['total'] += 1
+            count_dict[sample_name]["total"] += 1
             for count_class in count_dict[sample_name]:
                 if count_class in allele_quality:
-                    if "no_start_stop" not in count_class and "no_start_stop" in allele_quality:
+                    if (
+                        "no_start_stop" not in count_class
+                        and "no_start_stop" in allele_quality
+                    ):
                         if count_class == "bad_quality":
                             count_dict[sample_name][count_class] += 1
                     else:
@@ -673,10 +1003,13 @@ def paralog_exact_tag(sample_name, core_name, tag, schema_quality, matching_gene
         for paralog_quality in paralog_quality_count:
             count += 1
             if "bad_quality" in paralog_quality:
-                count_dict[sample_name]['total'] += 1
+                count_dict[sample_name]["total"] += 1
                 for count_class in count_dict[sample_name]:
                     if count_class in paralog_quality:
-                        if "no_start_stop" not in count_class and "no_start_stop" in paralog_quality:
+                        if (
+                            "no_start_stop" not in count_class
+                            and "no_start_stop" in paralog_quality
+                        ):
                             if count_class == "bad_quality":
                                 count_dict[sample_name][count_class] += 1
                             else:
@@ -687,8 +1020,8 @@ def paralog_exact_tag(sample_name, core_name, tag, schema_quality, matching_gene
 
             else:
                 if count == len(paralog_quality_count):
-                    count_dict[sample_name]['total'] += 1
-                    count_dict[sample_name]['good_quality'] += 1
+                    count_dict[sample_name]["total"] += 1
+                    count_dict[sample_name]["good_quality"] += 1
 
     return True
 
@@ -697,97 +1030,225 @@ def paralog_exact_tag(sample_name, core_name, tag, schema_quality, matching_gene
 # Tag INF/ASM/ALM/PLOT cases and keep info #
 # · * · * · * · * · * · * · * · * · * · *  #
 
-def inf_asm_alm_tag(core_name, sample_name, tag, blast_values, allele_quality, new_sseq, matching_allele_length, tag_dict, list_tag, samples_matrix_dict, matching_genes_dict, prodigal_report, start_prodigal, end_prodigal, complete_predicted_seq, annotation_core_dict, count_dict, logger):
 
+def inf_asm_alm_tag(
+    core_name,
+    sample_name,
+    tag,
+    blast_values,
+    allele_quality,
+    new_sseq,
+    matching_allele_length,
+    tag_dict,
+    list_tag,
+    samples_matrix_dict,
+    matching_genes_dict,
+    prodigal_report,
+    start_prodigal,
+    end_prodigal,
+    complete_predicted_seq,
+    annotation_core_dict,
+    count_dict,
+    logger,
+):
     gene_annot, product_annot = annotation_core_dict[core_name]
 
-    qseqid, sseqid, pident,  qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = blast_values
-
-    sseq = sseq.replace('-', '')
+    (
+        qseqid,
+        sseqid,
+        pident,
+        qlen,
+        s_length,
+        mismatch,
+        r_gapopen,
+        r_evalue,
+        bitscore,
+        sstart,
+        send,
+        qstart,
+        qend,
+        sseq,
+        qseq,
+    ) = blast_values
+
+    sseq = sseq.replace("-", "")
     s_length = len(sseq)
     new_sequence_length = len(new_sseq)
 
-    logger.info('Found %s at sample %s for core gene %s ', tag, sample_name, core_name)
+    logger.info("Found %s at sample %s for core gene %s ", tag, sample_name, core_name)
 
-    if tag == 'PLOT':
-        tag_allele = tag + '_' + str(qseqid)
+    if tag == "PLOT":
+        tag_allele = tag + "_" + str(qseqid)
     else:
         # Adding ASM/ALM/INF allele to the allele_matrix if it is not already include
         if not core_name in tag_dict:
             tag_dict[core_name] = []
-        if not new_sseq in tag_dict[core_name] :
+        if not new_sseq in tag_dict[core_name]:
             tag_dict[core_name].append(new_sseq)
         # Find the index of ASM/ALM/INF to include it in the sample matrix dict
         index_tag = tag_dict[core_name].index(new_sseq)
 
-        tag_allele = tag + '_' + core_name + '_' + str(qseqid) + '_' + str(index_tag)
+        tag_allele = tag + "_" + core_name + "_" + str(qseqid) + "_" + str(index_tag)
 
     samples_matrix_dict[sample_name].append(tag_allele)
 
     # Keeping INF/ASM/ALM/PLOT report info
-    if not core_name in list_tag :
+    if not core_name in list_tag:
         list_tag[core_name] = {}
-    if not sample_name in list_tag[core_name] :
+    if not sample_name in list_tag[core_name]:
         list_tag[core_name][sample_name] = {}
 
-    if tag == 'INF':
-        list_tag[core_name][sample_name][tag_allele] = [gene_annot, product_annot, qseqid, allele_quality, sseqid,  bitscore, str(matching_allele_length), str(s_length), str(new_sequence_length), mismatch , r_gapopen, sstart, send,  new_sseq, complete_predicted_seq]
+    if tag == "INF":
+        list_tag[core_name][sample_name][tag_allele] = [
+            gene_annot,
+            product_annot,
+            qseqid,
+            allele_quality,
+            sseqid,
+            bitscore,
+            str(matching_allele_length),
+            str(s_length),
+            str(new_sequence_length),
+            mismatch,
+            r_gapopen,
+            sstart,
+            send,
+            new_sseq,
+            complete_predicted_seq,
+        ]
 
         # (recuento tags para plots)
-        count_dict[sample_name]['total'] += 1
+        count_dict[sample_name]["total"] += 1
         for count_class in count_dict[sample_name]:
             if count_class in allele_quality:
                 count_dict[sample_name][count_class] += 1
-                #if "bad_quality" in allele_quality:
-                 #   count_dict[sample_name]['bad_quality'] += 1
-
-    elif tag == 'PLOT':
-        list_tag[core_name][sample_name] = [gene_annot, product_annot, qseqid, allele_quality, sseqid, bitscore, sstart, send, sseq, new_sseq]
+                # if "bad_quality" in allele_quality:
+                #   count_dict[sample_name]['bad_quality'] += 1
+
+    elif tag == "PLOT":
+        list_tag[core_name][sample_name] = [
+            gene_annot,
+            product_annot,
+            qseqid,
+            allele_quality,
+            sseqid,
+            bitscore,
+            sstart,
+            send,
+            sseq,
+            new_sseq,
+        ]
 
         # (recuento tags para plots)
-        count_dict[sample_name]['total'] += 1
+        count_dict[sample_name]["total"] += 1
 
-    else :
-        if tag == 'ASM':
-            newsseq_vs_blastseq = 'shorter'
-        elif tag == 'ALM':
-            newsseq_vs_blastseq = 'longer'
+    else:
+        if tag == "ASM":
+            newsseq_vs_blastseq = "shorter"
+        elif tag == "ALM":
+            newsseq_vs_blastseq = "longer"
 
         if len(sseq) < matching_allele_length:
-            add_info = 'Global effect: DELETION. BLAST sequence length shorter than matching allele sequence length / Net result: ' + tag + '. Final gene sequence length ' + newsseq_vs_blastseq + ' than matching allele sequence length'
+            add_info = (
+                "Global effect: DELETION. BLAST sequence length shorter than matching allele sequence length / Net result: "
+                + tag
+                + ". Final gene sequence length "
+                + newsseq_vs_blastseq
+                + " than matching allele sequence length"
+            )
 
         elif len(sseq) == matching_allele_length:
-            add_info = 'Global effect: BASE SUBSTITUTION. BLAST sequence length equal to matching allele sequence length / Net result: ' + tag + '. Final gene sequence length ' + newsseq_vs_blastseq + ' than matching allele sequence length'
+            add_info = (
+                "Global effect: BASE SUBSTITUTION. BLAST sequence length equal to matching allele sequence length / Net result: "
+                + tag
+                + ". Final gene sequence length "
+                + newsseq_vs_blastseq
+                + " than matching allele sequence length"
+            )
 
         elif len(sseq) > matching_allele_length:
-            add_info = 'Global effect: INSERTION. BLAST sequence length longer than matching allele sequence length / Net result: ' + tag + '. Final gene sequence length ' + newsseq_vs_blastseq + ' than matching allele sequence length'
-
-        list_tag[core_name][sample_name][tag_allele] = [gene_annot, product_annot, qseqid, allele_quality, sseqid,  bitscore, str(matching_allele_length), str(s_length), str(new_sequence_length), mismatch , r_gapopen, sstart, send,  new_sseq, add_info, complete_predicted_seq]
+            add_info = (
+                "Global effect: INSERTION. BLAST sequence length longer than matching allele sequence length / Net result: "
+                + tag
+                + ". Final gene sequence length "
+                + newsseq_vs_blastseq
+                + " than matching allele sequence length"
+            )
+
+        list_tag[core_name][sample_name][tag_allele] = [
+            gene_annot,
+            product_annot,
+            qseqid,
+            allele_quality,
+            sseqid,
+            bitscore,
+            str(matching_allele_length),
+            str(s_length),
+            str(new_sequence_length),
+            mismatch,
+            r_gapopen,
+            sstart,
+            send,
+            new_sseq,
+            add_info,
+            complete_predicted_seq,
+        ]
 
     # (recuento tags para plots)
-    if tag == 'ASM':
-        count_dict[sample_name]['total'] += 1
+    if tag == "ASM":
+        count_dict[sample_name]["total"] += 1
         for mut_type in count_dict[sample_name]:
             if mut_type in add_info.lower():
                 count_dict[sample_name][mut_type] += 1
 
-    elif tag == 'ALM':
-        count_dict[sample_name]['total'] += 1
+    elif tag == "ALM":
+        count_dict[sample_name]["total"] += 1
         for mut_type in count_dict[sample_name]:
             if mut_type in add_info.lower():
                 count_dict[sample_name][mut_type] += 1
 
-    if not sseqid in matching_genes_dict[sample_name] :
+    if not sseqid in matching_genes_dict[sample_name]:
         matching_genes_dict[sample_name][sseqid] = []
-    if sstart > send :
-        #matching_genes_dict[sample_name][sseqid].append([core_name, str(int(sstart)-new_sequence_length -1), sstart,'-', tag_allele])
-        matching_genes_dict[sample_name][sseqid].append([core_name, qseqid, str(int(sstart)-new_sequence_length -1), sstart,'-', tag_allele])
+    if sstart > send:
+        # matching_genes_dict[sample_name][sseqid].append([core_name, str(int(sstart)-new_sequence_length -1), sstart,'-', tag_allele])
+        matching_genes_dict[sample_name][sseqid].append(
+            [
+                core_name,
+                qseqid,
+                str(int(sstart) - new_sequence_length - 1),
+                sstart,
+                "-",
+                tag_allele,
+            ]
+        )
     else:
-        #matching_genes_dict[sample_name][sseqid].append([core_name, sstart, str(int(sstart)+ new_sequence_length),'+', tag_allele])
-        matching_genes_dict[sample_name][sseqid].append([core_name, qseqid, sstart, str(int(sstart)+ new_sequence_length),'+', tag_allele])
+        # matching_genes_dict[sample_name][sseqid].append([core_name, sstart, str(int(sstart)+ new_sequence_length),'+', tag_allele])
+        matching_genes_dict[sample_name][sseqid].append(
+            [
+                core_name,
+                qseqid,
+                sstart,
+                str(int(sstart) + new_sequence_length),
+                "+",
+                tag_allele,
+            ]
+        )
 
     ##### informe prodigal #####
-    prodigal_report.append([core_name, sample_name, qseqid, tag_allele, sstart, send, start_prodigal, end_prodigal, sseq, complete_predicted_seq])
+    prodigal_report.append(
+        [
+            core_name,
+            sample_name,
+            qseqid,
+            tag_allele,
+            sstart,
+            send,
+            start_prodigal,
+            end_prodigal,
+            sseq,
+            complete_predicted_seq,
+        ]
+    )
 
     return True
 
@@ -796,24 +1257,41 @@ def inf_asm_alm_tag(core_name, sample_name, tag, blast_values, allele_quality, n
 # Keep best results info after BLAST using results from previous reference allele BLAST as database VS ALL alleles in locus as query in allele calling analysis #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · #
 
-def get_blast_results (sample_name, values, contigs_in_sample_dict, allele_found, logger) :
 
-    qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = values
+def get_blast_results(
+    sample_name, values, contigs_in_sample_dict, allele_found, logger
+):
+    (
+        qseqid,
+        sseqid,
+        pident,
+        qlen,
+        s_length,
+        mismatch,
+        r_gapopen,
+        r_evalue,
+        bitscore,
+        sstart,
+        send,
+        qstart,
+        qend,
+        sseq,
+        qseq,
+    ) = values
 
     ## Get contig ID and BLAST sequence
-    sseqid_blast = "_".join(sseqid.split('_')[1:])
-    sseq_no_gaps = sseq.replace('-', '')
-
+    sseqid_blast = "_".join(sseqid.split("_")[1:])
+    sseq_no_gaps = sseq.replace("-", "")
 
     ## Get start and end positions in contig searching for BLAST sequence index in contig sequence
 
     # Get contig sequence
     accession_sequence = contigs_in_sample_dict[sample_name][sseqid_blast]
 
-    #for record in sample_contigs: ## parse
-        #if record.id == sseqid_blast : ## parse
-            #break ## parse
-    #accession_sequence = str(record.seq) ## parse
+    # for record in sample_contigs: ## parse
+    # if record.id == sseqid_blast : ## parse
+    # break ## parse
+    # accession_sequence = str(record.seq) ## parse
 
     # Try to get BLAST sequence index in contig. If index -> error because different contig sequence and BLAST sequence
     # direction, obtain reverse complement BLAST sequence and try again.
@@ -834,26 +1312,55 @@ def get_blast_results (sample_name, values, contigs_in_sample_dict, allele_found
         sstart_new = str(max(sseq_index_1, sseq_index_2))
         send_new = str(min(sseq_index_1, sseq_index_2))
 
-
     ## Keep BLAST results info discarding subsets
     allele_is_subset = False
 
-    if len(allele_found) > 0 :
-        for allele_id in allele_found :
-            min_index = min(int(allele_found[allele_id][9]), int(allele_found[allele_id][10]))
-            max_index = max(int(allele_found[allele_id][9]), int(allele_found[allele_id][10]))
-            if int(sstart_new) in range(min_index, max_index + 1) or  int(send_new) in range(min_index, max_index + 1): # if both genome locations overlap
-                if sseqid_blast == allele_found[allele_id][1]: # if both sequences are in the same contig
-                    logger.info('Found allele %s that starts or ends at the same position as %s ' , qseqid, allele_id)
+    if len(allele_found) > 0:
+        for allele_id in allele_found:
+            min_index = min(
+                int(allele_found[allele_id][9]), int(allele_found[allele_id][10])
+            )
+            max_index = max(
+                int(allele_found[allele_id][9]), int(allele_found[allele_id][10])
+            )
+            if int(sstart_new) in range(min_index, max_index + 1) or int(
+                send_new
+            ) in range(
+                min_index, max_index + 1
+            ):  # if both genome locations overlap
+                if (
+                    sseqid_blast == allele_found[allele_id][1]
+                ):  # if both sequences are in the same contig
+                    logger.info(
+                        "Found allele %s that starts or ends at the same position as %s ",
+                        qseqid,
+                        allele_id,
+                    )
                     allele_is_subset = True
                     break
 
-    if len(allele_found) == 0 or not allele_is_subset :
-        contig_id_start = str(sseqid_blast + '_'+ sstart_new)
+    if len(allele_found) == 0 or not allele_is_subset:
+        contig_id_start = str(sseqid_blast + "_" + sstart_new)
 
         # Skip the allele found in the 100% identity and 100% alignment
         if not contig_id_start in allele_found:
-            allele_found[contig_id_start] = [qseqid, sseqid_blast, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart_new, send_new, '-', '-', sseq, qseq]
+            allele_found[contig_id_start] = [
+                qseqid,
+                sseqid_blast,
+                pident,
+                qlen,
+                s_length,
+                mismatch,
+                r_gapopen,
+                r_evalue,
+                bitscore,
+                sstart_new,
+                send_new,
+                "-",
+                "-",
+                sseq,
+                qseq,
+            ]
 
     return True
 
@@ -862,35 +1369,55 @@ def get_blast_results (sample_name, values, contigs_in_sample_dict, allele_found
 # Get SNPs and ADN and protein alignment #
 # · * · * · * · * · * · * · * · * · * ·  #
 
-def keep_snp_alignment_info(sseq, new_sseq, matching_allele_seq, qseqid, query_direction, core_name, sample_name, reward, penalty, gapopen, gapextend, snp_dict, match_alignment_dict, protein_dict, logger):
 
+def keep_snp_alignment_info(
+    sseq,
+    new_sseq,
+    matching_allele_seq,
+    qseqid,
+    query_direction,
+    core_name,
+    sample_name,
+    reward,
+    penalty,
+    gapopen,
+    gapextend,
+    snp_dict,
+    match_alignment_dict,
+    protein_dict,
+    logger,
+):
     ## Check allele sequence direction
-    if query_direction == 'reverse':
+    if query_direction == "reverse":
         matching_allele_seq = str(Seq.Seq(matching_allele_seq).reverse_complement())
     else:
         matching_allele_seq = str(matching_allele_seq)
 
     ## Get the SNP information
     snp_information = get_snp(sseq, matching_allele_seq)
-    if len(snp_information) > 0 :
-        if not core_name in snp_dict :
+    if len(snp_information) > 0:
+        if not core_name in snp_dict:
             snp_dict[core_name] = {}
-        if not sample_name in snp_dict[core_name] :
+        if not sample_name in snp_dict[core_name]:
             snp_dict[core_name][sample_name] = {}
-        snp_dict[core_name][sample_name][qseqid]= snp_information
+        snp_dict[core_name][sample_name][qseqid] = snp_information
 
     ## Get new sequence-allele sequence dna alignment
-    if not core_name in match_alignment_dict :
+    if not core_name in match_alignment_dict:
         match_alignment_dict[core_name] = {}
-        if not sample_name in match_alignment_dict[core_name] :
-            match_alignment_dict[core_name][sample_name] = get_alignment (new_sseq, matching_allele_seq, reward, penalty, gapopen, gapextend)
+        if not sample_name in match_alignment_dict[core_name]:
+            match_alignment_dict[core_name][sample_name] = get_alignment(
+                new_sseq, matching_allele_seq, reward, penalty, gapopen, gapextend
+            )
 
     ## Get new sequence-allele sequence protein alignment
-    if not core_name in protein_dict :
+    if not core_name in protein_dict:
         protein_dict[core_name] = {}
-    if not sample_name in protein_dict[core_name] :
+    if not sample_name in protein_dict[core_name]:
         protein_dict[core_name][sample_name] = []
-    protein_dict[core_name][sample_name] = get_alignment (new_sseq, matching_allele_seq, reward, penalty, gapopen, gapextend, "protein")
+    protein_dict[core_name][sample_name] = get_alignment(
+        new_sseq, matching_allele_seq, reward, penalty, gapopen, gapextend, "protein"
+    )
 
     return True
 
@@ -899,52 +1426,81 @@ def keep_snp_alignment_info(sseq, new_sseq, matching_allele_seq, qseqid, query_d
 # Create allele tag summary for each sample  #
 # · * · * · * · * · * · * · * · * · * · * ·  #
 
-def create_summary (samples_matrix_dict, logger) :
 
+def create_summary(samples_matrix_dict, logger):
     summary_dict = {}
     summary_result_list = []
-    summary_heading_list = ['Exact match', 'INF', 'ASM', 'ALM', 'LNF', 'TPR', 'NIPH', 'NIPHEM', 'PLOT', 'ERROR']
-    summary_result_list.append('File\t' + '\t'.join(summary_heading_list))
-    for key in sorted (samples_matrix_dict) :
-
-        summary_dict[key] = {'Exact match':0, 'INF':0, 'ASM':0, 'ALM':0, 'LNF':0, 'TPR':0,'NIPH':0, 'NIPHEM':0, 'PLOT':0, 'ERROR':0}
-        for values in samples_matrix_dict[key] :
-            if 'INF_' in values :
-                summary_dict[key]['INF'] += 1
-            elif 'ASM_' in values :
-                summary_dict[key]['ASM'] += 1
-            elif 'ALM_' in values :
-                summary_dict[key]['ALM'] += 1
-            elif 'LNF' in values :
-                summary_dict[key]['LNF'] += 1
-            elif 'TPR' in values :
-                summary_dict[key]['TPR'] += 1
-            elif 'NIPH' == values :
-                summary_dict[key]['NIPH'] += 1
-            elif 'NIPHEM' == values :
-                summary_dict[key]['NIPHEM'] += 1
-            elif 'PLOT' in values :
-                summary_dict[key]['PLOT'] += 1
-            elif 'ERROR' in values :
-                summary_dict[key]['ERROR'] += 1
+    summary_heading_list = [
+        "Exact match",
+        "INF",
+        "ASM",
+        "ALM",
+        "LNF",
+        "TPR",
+        "NIPH",
+        "NIPHEM",
+        "PLOT",
+        "ERROR",
+    ]
+    summary_result_list.append("File\t" + "\t".join(summary_heading_list))
+    for key in sorted(samples_matrix_dict):
+        summary_dict[key] = {
+            "Exact match": 0,
+            "INF": 0,
+            "ASM": 0,
+            "ALM": 0,
+            "LNF": 0,
+            "TPR": 0,
+            "NIPH": 0,
+            "NIPHEM": 0,
+            "PLOT": 0,
+            "ERROR": 0,
+        }
+        for values in samples_matrix_dict[key]:
+            if "INF_" in values:
+                summary_dict[key]["INF"] += 1
+            elif "ASM_" in values:
+                summary_dict[key]["ASM"] += 1
+            elif "ALM_" in values:
+                summary_dict[key]["ALM"] += 1
+            elif "LNF" in values:
+                summary_dict[key]["LNF"] += 1
+            elif "TPR" in values:
+                summary_dict[key]["TPR"] += 1
+            elif "NIPH" == values:
+                summary_dict[key]["NIPH"] += 1
+            elif "NIPHEM" == values:
+                summary_dict[key]["NIPHEM"] += 1
+            elif "PLOT" in values:
+                summary_dict[key]["PLOT"] += 1
+            elif "ERROR" in values:
+                summary_dict[key]["ERROR"] += 1
             else:
                 try:
                     number = int(values)
-                    summary_dict[key]['Exact match'] +=1
+                    summary_dict[key]["Exact match"] += 1
                 except:
-                    if '_' in values :
+                    if "_" in values:
                         tmp_value = values
                         try:
                             number = int(tmp_value[-1])
-                            summary_dict[key]['Exact match'] +=1
+                            summary_dict[key]["Exact match"] += 1
                         except:
-                            logger.debug('The value %s, was found when collecting summary information for the %s', values, summary_dict[key] )
+                            logger.debug(
+                                "The value %s, was found when collecting summary information for the %s",
+                                values,
+                                summary_dict[key],
+                            )
                     else:
-                        logger.debug('The value %s, was found when collecting summary information for the %s', values, summary_dict[key] )
+                        logger.debug(
+                            "The value %s, was found when collecting summary information for the %s",
+                            values,
+                            summary_dict[key],
+                        )
         summary_sample_list = []
-        for item in summary_heading_list :
+        for item in summary_heading_list:
             summary_sample_list.append(str(summary_dict[key][item]))
-        summary_result_list.append(key + '\t' +'\t'.join(summary_sample_list))
+        summary_result_list.append(key + "\t" + "\t".join(summary_sample_list))
     return summary_result_list
 
 
@@ -952,24 +1508,55 @@ def create_summary (samples_matrix_dict, logger) :
 # Get gene and product annotation for core gene using Prokka #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * ·  #
 
-### (tsv para algunos locus? Utils para analyze schema?)
-def get_gene_annotation (annotation_file, annotation_dir, genus, species, usegenus, logger) :
 
-    name_file = os.path.basename(annotation_file).split('.')
-    annotation_dir = os.path.join (annotation_dir, 'annotation', name_file[0])
-
-    if usegenus == 'true':
-        annotation_result = subprocess.run (['prokka', annotation_file, '--outdir', annotation_dir,
-                                            '--genus', genus, '--species', species, '--usegenus',
-                                            '--gcode', '11', '--prefix', name_file[0], '--quiet'])
-
-    elif usegenus == 'false':
-        annotation_result = subprocess.run (['prokka', annotation_file, '--outdir', annotation_dir,
-                                            '--genus', genus, '--species', species,
-                                            '--gcode', '11', '--prefix', name_file[0], '--quiet'])
+### (tsv para algunos locus? Utils para analyze schema?)
+def get_gene_annotation(
+    annotation_file, annotation_dir, genus, species, usegenus, logger
+):
+    name_file = os.path.basename(annotation_file).split(".")
+    annotation_dir = os.path.join(annotation_dir, "annotation", name_file[0])
+
+    if usegenus == "true":
+        annotation_result = subprocess.run(
+            [
+                "prokka",
+                annotation_file,
+                "--outdir",
+                annotation_dir,
+                "--genus",
+                genus,
+                "--species",
+                species,
+                "--usegenus",
+                "--gcode",
+                "11",
+                "--prefix",
+                name_file[0],
+                "--quiet",
+            ]
+        )
+
+    elif usegenus == "false":
+        annotation_result = subprocess.run(
+            [
+                "prokka",
+                annotation_file,
+                "--outdir",
+                annotation_dir,
+                "--genus",
+                genus,
+                "--species",
+                species,
+                "--gcode",
+                "11",
+                "--prefix",
+                name_file[0],
+                "--quiet",
+            ]
+        )
 
     annot_tsv = []
-    tsv_path = os.path.join (annotation_dir, name_file[0] + '.tsv')
+    tsv_path = os.path.join(annotation_dir, name_file[0] + ".tsv")
 
     try:
         with open(tsv_path) as tsvfile:
@@ -978,31 +1565,31 @@ def get_gene_annotation (annotation_file, annotation_dir, genus, species, usegen
                 annot_tsv.append(line)
 
         if len(annot_tsv) > 1:
-
             gene_index = annot_tsv[0].index("gene")
             product_index = annot_tsv[0].index("product")
 
             try:
-                if '_' in annot_tsv[1][2]:
-                    gene_annot = annot_tsv[1][gene_index].split('_')[0]
+                if "_" in annot_tsv[1][2]:
+                    gene_annot = annot_tsv[1][gene_index].split("_")[0]
                 else:
                     gene_annot = annot_tsv[1][gene_index]
             except:
-                gene_annot = 'Not found by Prokka'
+                gene_annot = "Not found by Prokka"
 
             try:
                 product_annot = annot_tsv[1][product_index]
             except:
-                product_annot = 'Not found by Prokka'
+                product_annot = "Not found by Prokka"
         else:
-            gene_annot = 'Not found by Prokka'
-            product_annot = 'Not found by Prokka'
+            gene_annot = "Not found by Prokka"
+            product_annot = "Not found by Prokka"
     except:
-        gene_annot = 'Not found by Prokka'
-        product_annot = 'Not found by Prokka'
+        gene_annot = "Not found by Prokka"
+        product_annot = "Not found by Prokka"
 
     return gene_annot, product_annot
 
+
 """
 def get_gene_annotation (annotation_file, annotation_dir, logger) :
     name_file = os.path.basename(annotation_file).split('.')
@@ -1079,7 +1666,7 @@ def get_gene_annotation (annotation_file, annotation_dir, logger) :
 """
 
 
-def analize_annotation_files (in_file, logger) : ## N
+def analize_annotation_files(in_file, logger):  ## N
     examiner = GFF.GFFExaminer()
     file_fh = open(in_file)
     datos = examiner.available_limits(in_file)
@@ -1087,24 +1674,33 @@ def analize_annotation_files (in_file, logger) : ## N
     return True
 
 
-def get_inferred_allele_number(core_dict, logger): ## N
-    #This function will look for the highest locus number and it will return a safe high value
+def get_inferred_allele_number(core_dict, logger):  ## N
+    # This function will look for the highest locus number and it will return a safe high value
     # that will be added to the schema database
-    logger.debug('running get_inferred_allele_number function')
+    logger.debug("running get_inferred_allele_number function")
     int_keys = []
     for key in core_dict.keys():
         int_keys.append(key)
     max_value = max(int_keys)
     digit_length = len(str(max_value))
-    return  True   #str 1 ( #'1'+ '0'*digit_length + 2)
+    return True  # str 1 ( #'1'+ '0'*digit_length + 2)
 
 
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * #
 # Get ST profile for each samples based on allele calling results #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * #
 
-def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_list_files, sample_list_files, logger):
-                    ## logger
+
+def get_ST_profile(
+    outputdir,
+    profile_csv_path,
+    exact_dict,
+    inf_dict,
+    core_gene_list_files,
+    sample_list_files,
+    logger,
+):
+    ## logger
 
     csv_read = []
     ST_profiles_dict = {}
@@ -1118,15 +1714,17 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
         for line in csvreader:
             csv_read.append(line)
 
-    profile_header = csv_read[0][1:len(core_gene_list_files) + 1]
+    profile_header = csv_read[0][1 : len(core_gene_list_files) + 1]
 
     for ST_index in range(1, len(csv_read)):
         ST_profiles_dict[csv_read[ST_index][0]] = {}
         for core_index in range(len(profile_header)):
-            ST_profiles_dict[csv_read[ST_index][0]][profile_header[core_index]] = csv_read[ST_index][core_index + 1]
+            ST_profiles_dict[csv_read[ST_index][0]][
+                profile_header[core_index]
+            ] = csv_read[ST_index][core_index + 1]
 
     for sample_file in sample_list_files:
-        sample_name = '.'.join(os.path.basename(sample_file).split('.')[:-1])
+        sample_name = ".".join(os.path.basename(sample_file).split(".")[:-1])
 
         st_counter = 0
         for ST in ST_profiles_dict:
@@ -1140,14 +1738,16 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
                     if core_name in exact_dict[sample_name]:
                         allele_in_sample = exact_dict[sample_name][core_name][2]
 
-                        if not '_' in allele_in_ST:
-                            if '_' in allele_in_sample:
-                                allele_in_sample = allele_in_sample.split('_')[1]
+                        if not "_" in allele_in_ST:
+                            if "_" in allele_in_sample:
+                                allele_in_sample = allele_in_sample.split("_")[1]
 
                         if st_counter == 0:
                             if sample_name not in analysis_profiles_dict:
                                 analysis_profiles_dict[sample_name] = {}
-                            analysis_profiles_dict[sample_name][core_name] = allele_in_sample
+                            analysis_profiles_dict[sample_name][
+                                core_name
+                            ] = allele_in_sample
 
                         if allele_in_sample == allele_in_ST:
                             core_counter += 1
@@ -1165,14 +1765,16 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
                                 allele_in_sample = inf_dict[sample_name][core_name][2]
                                 if sample_name not in analysis_profiles_dict:
                                     analysis_profiles_dict[sample_name] = {}
-                                analysis_profiles_dict[sample_name][core_name] = allele_in_sample
+                                analysis_profiles_dict[sample_name][
+                                    core_name
+                                ] = allele_in_sample
 
                         else:
                             if st_counter == 0:
                                 if sample_name not in analysis_profiles_dict:
                                     analysis_profiles_dict[sample_name] = {}
 
-                if allele_in_ST == 'N' and "allele_in_sample" not in locals():
+                if allele_in_ST == "N" and "allele_in_sample" not in locals():
                     core_counter += 1
 
             st_counter += 1
@@ -1199,10 +1801,12 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
             if sample_name in analysis_profiles_dict:
                 if len(analysis_profiles_dict[sample_name]) == len(profile_header):
                     new_st_id = str(len(ST_profiles_dict) + 1)
-                    ST_profiles_dict[new_st_id  + "_INF"] = analysis_profile_dict[sample_name]
+                    ST_profiles_dict[new_st_id + "_INF"] = analysis_profile_dict[
+                        sample_name
+                    ]
                     inf_ST[new_st_id] = analysis_profile_dict[sample_name]
 
-                    samples_profiles_dict[sample_name]=new_st_id  + "_INF"
+                    samples_profiles_dict[sample_name] = new_st_id + "_INF"
 
                     if "New" not in count_st:
                         count_st["New"] = {}
@@ -1211,24 +1815,24 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
                     count_st["New"][new_st_id] += 1
 
                 else:
-                    samples_profiles_dict[sample_name] = '-'
+                    samples_profiles_dict[sample_name] = "-"
 
                     if "Unknown" not in count_st:
                         count_st["Unknown"] = 0
                     count_st["Unknown"] += 1
             else:
-                samples_profiles_dict[sample_name] = '-'
+                samples_profiles_dict[sample_name] = "-"
 
                 if "Unknown" not in count_st:
                     count_st["Unknown"] = 0
                 count_st["Unknown"] += 1
 
     ## Create ST profile results report
-    save_st_profile_results (outputdir, samples_profiles_dict, logger)
+    save_st_profile_results(outputdir, samples_profiles_dict, logger)
 
     ## Obtain interactive piechart
-    logger.info('Creating interactive ST results piechart')
-    create_sunburst_plot_st (outputdir, count_st, logger)
+    logger.info("Creating interactive ST results piechart")
+    create_sunburst_plot_st(outputdir, count_st, logger)
 
     return True, inf_ST
 
@@ -1237,24 +1841,24 @@ def get_ST_profile(outputdir, profile_csv_path, exact_dict, inf_dict, core_gene_
 # Create ST results report #
 # · * · * · * · * · * · *  #
 
-def save_st_profile_results (outputdir, samples_profiles_dict, logger):
 
-    header_stprofile = ['Sample Name', 'ST']
+def save_st_profile_results(outputdir, samples_profiles_dict, logger):
+    header_stprofile = ["Sample Name", "ST"]
 
-    if samples_profiles_dict != '':
+    if samples_profiles_dict != "":
         ## Saving ST profile to file
-        logger.info('Saving ST profile information to file..')
-        stprofile_file =  os.path.join(outputdir, 'stprofile.tsv')
-        with open (stprofile_file , 'w') as st_fh :
-            st_fh.write('\t'.join(header_stprofile)+ '\n')
+        logger.info("Saving ST profile information to file..")
+        stprofile_file = os.path.join(outputdir, "stprofile.tsv")
+        with open(stprofile_file, "w") as st_fh:
+            st_fh.write("\t".join(header_stprofile) + "\n")
             for sample in sorted(samples_profiles_dict):
-                st_fh.write(sample + '\t' + samples_profiles_dict[sample] + '\n')
+                st_fh.write(sample + "\t" + samples_profiles_dict[sample] + "\n")
 
     return True
 
 
-def create_sunburst_plot_st (outputdir, count_st, logger):
-                            ### logger?
+def create_sunburst_plot_st(outputdir, count_st, logger):
+    ### logger?
     counts = []
     st_ids = ["ST"]
     st_labels = ["ST"]
@@ -1263,7 +1867,6 @@ def create_sunburst_plot_st (outputdir, count_st, logger):
     total_samples = 0
 
     for st_type in count_st:
-
         if type(count_st[st_type]) == dict:
             total_st_type_count = sum(count_st[st_type].values())
         else:
@@ -1285,17 +1888,19 @@ def create_sunburst_plot_st (outputdir, count_st, logger):
 
     counts.insert(0, total_samples)
 
-    fig = go.Figure(go.Sunburst(
-    ids = st_ids,
-    labels = st_labels,
-    parents = st_parents,
-    values = counts,
-    branchvalues = "total",
-    ))
+    fig = go.Figure(
+        go.Sunburst(
+            ids=st_ids,
+            labels=st_labels,
+            parents=st_parents,
+            values=counts,
+            branchvalues="total",
+        )
+    )
 
-    fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
+    fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))
 
-    plotsdir = os.path.join(outputdir, 'plots', 'samples_st.html')
+    plotsdir = os.path.join(outputdir, "plots", "samples_st.html")
 
     fig.write_html(plotsdir)
 
@@ -1306,32 +1911,38 @@ def create_sunburst_plot_st (outputdir, count_st, logger):
 # Update ST profile file adding new ST found #
 # · * · * · * · * · * · * · * · * · * · * ·  #
 
-def update_st_profile (updateprofile, profile_csv_path, outputdir, inf_ST, core_gene_list_files, logger):
 
+def update_st_profile(
+    updateprofile, profile_csv_path, outputdir, inf_ST, core_gene_list_files, logger
+):
     ## Create a copy of ST profile file if updateprofile = 'new'
-    if updateprofile == 'new':
+    if updateprofile == "new":
         no_updated_profile_csv_path = profile_csv_path
-        profile_csv_path_name = os.path.basename(no_updated_profile_csv_path).split('.')[0]
-        profile_csv_path = os.path.join(outputdir, profile_csv_path_name + '_updated' + '.csv')
+        profile_csv_path_name = os.path.basename(no_updated_profile_csv_path).split(
+            "."
+        )[0]
+        profile_csv_path = os.path.join(
+            outputdir, profile_csv_path_name + "_updated" + ".csv"
+        )
         shutil.copyfile(no_updated_profile_csv_path, profile_csv_path)
-        logger.info('Copying ST profile file to update profiles')
+        logger.info("Copying ST profile file to update profiles")
 
     ## Update ST profile file
-    logger.info('Updating ST profile file adding new INF ST')
+    logger.info("Updating ST profile file adding new INF ST")
 
-    with open (profile_csv_path, 'r') as csvfile:
+    with open(profile_csv_path, "r") as csvfile:
         csvreader = csv.reader(csvfile, delimiter="\t")
         for line in csvreader:
-            profile_header = line[0][1:len(core_gene_list_files) + 1]
+            profile_header = line[0][1 : len(core_gene_list_files) + 1]
             break
 
-    with open (profile_csv_path, 'a') as profile_fh:
+    with open(profile_csv_path, "a") as profile_fh:
         for ST in inf_ST:
             locus_ST_list = []
             locus_ST_list.append(ST)
             for locus in profile_header:
                 locus_ST_list.append(inf_ST[ST][locus])
-            profile_fh.write ('\t'.join(locus_ST_list)+ '\n')
+            profile_fh.write("\t".join(locus_ST_list) + "\n")
 
     return True
 
@@ -1340,312 +1951,760 @@ def update_st_profile (updateprofile, profile_csv_path, outputdir, inf_ST, core_
 # Create allele calling results reports #
 # · * · * · * · * · * · * · * · * · * · #
 
-def save_allele_call_results (outputdir, full_gene_list, samples_matrix_dict, exact_dict, paralog_dict, inf_dict, plot_dict, matching_genes_dict, list_asm, list_alm, lnf_tpr_dict, snp_dict, match_alignment_dict, protein_dict, prodigal_report, shorter_seq_coverage, longer_seq_coverage, equal_seq_coverage, shorter_blast_seq_coverage, longer_blast_seq_coverage, equal_blast_seq_coverage, logger):
-    header_matching_alleles_contig = ['Sample Name', 'Contig', 'Core Gene', 'Allele', 'Contig Start', 'Contig Stop', 'Direction', 'Codification']
-    header_exact = ['Core Gene', 'Sample Name', 'Gene Annotation', 'Product Annotation', 'Allele', 'Allele Quality', 'Contig', 'Query length', 'Contig start', 'Contig end', 'Sequence', 'Predicted Sequence']
-    header_paralogs = ['Core Gene','Sample Name', 'Gene Annotation', 'Product Annotation', 'Paralog Tag', 'ID %', 'Allele', 'Allele Quality', 'Contig', 'Bitscore', 'Contig start', 'Contig end', 'Sequence', 'Predicted Sequence']
-    header_inferred = ['Core Gene','Sample Name', 'INF tag', 'Gene Annotation', 'Product Annotation', 'Allele', 'Allele Quality', 'Contig', 'Bitscore', 'Query length', 'Contig length', 'New sequence length' , 'Mismatch' , 'gaps', 'Contig start', 'Contig end',  'New sequence', 'Predicted Sequence']
-    header_asm = ['Core Gene', 'Sample Name', 'ASM tag', 'Gene Annotation', 'Product Annotation', 'Allele', 'Allele Quality', 'Contig', 'Bitscore', 'Query length', 'Contig length', 'New sequence length' , 'Mismatch' , 'gaps', 'Contig start', 'Contig end',  'New sequence', 'Additional info', 'Predicted Sequence']
-    header_alm = ['Core Gene', 'Sample Name', 'ALM tag', 'Gene Annotation', 'Product Annotation', 'Allele', 'Allele Quality', 'Contig', 'Bitscore', 'Query length', 'Contig length', 'New sequence length' , 'Mismatch' , 'gaps', 'Contig start', 'Contig end',  'New sequence', 'Additional info', 'Predicted Sequence']
-    header_plot = ['Core Gene', 'Sample Name', 'Gene Annotation', 'Product Annotation', 'Allele', 'Allele Quality', 'Contig','Bitscore', 'Contig start', 'Contig end', 'Sequence', 'Predicted Sequence']
-    header_lnf_tpr = ['Core Gene', 'Sample Name', 'Gene Annotation', 'Product Annotation', 'Tag', 'Allele', 'Allele Quality', 'ID %', 'Blast sequence coverage %', 'New sequence coverage %', 'Query length', 'Contig length', 'New sequence length', 'Additional info']
-    header_snp = ['Core Gene', 'Sample Name', 'Allele', 'Position', 'Mutation Schema/Sample', 'Codon Schema/Sample','Amino acid in Schema/Sample', 'Mutation type','Annotation Schema/Sample']
-    header_protein = ['Core Gene','Sample Name', 'Protein in ' , 'Protein sequence']
-    header_match_alignment = ['Core Gene','Sample Name','Alignment', 'Sequence']
-    header_stprofile = ['Sample Name', 'ST']
 
+def save_allele_call_results(
+    outputdir,
+    full_gene_list,
+    samples_matrix_dict,
+    exact_dict,
+    paralog_dict,
+    inf_dict,
+    plot_dict,
+    matching_genes_dict,
+    list_asm,
+    list_alm,
+    lnf_tpr_dict,
+    snp_dict,
+    match_alignment_dict,
+    protein_dict,
+    prodigal_report,
+    shorter_seq_coverage,
+    longer_seq_coverage,
+    equal_seq_coverage,
+    shorter_blast_seq_coverage,
+    longer_blast_seq_coverage,
+    equal_blast_seq_coverage,
+    logger,
+):
+    header_matching_alleles_contig = [
+        "Sample Name",
+        "Contig",
+        "Core Gene",
+        "Allele",
+        "Contig Start",
+        "Contig Stop",
+        "Direction",
+        "Codification",
+    ]
+    header_exact = [
+        "Core Gene",
+        "Sample Name",
+        "Gene Annotation",
+        "Product Annotation",
+        "Allele",
+        "Allele Quality",
+        "Contig",
+        "Query length",
+        "Contig start",
+        "Contig end",
+        "Sequence",
+        "Predicted Sequence",
+    ]
+    header_paralogs = [
+        "Core Gene",
+        "Sample Name",
+        "Gene Annotation",
+        "Product Annotation",
+        "Paralog Tag",
+        "ID %",
+        "Allele",
+        "Allele Quality",
+        "Contig",
+        "Bitscore",
+        "Contig start",
+        "Contig end",
+        "Sequence",
+        "Predicted Sequence",
+    ]
+    header_inferred = [
+        "Core Gene",
+        "Sample Name",
+        "INF tag",
+        "Gene Annotation",
+        "Product Annotation",
+        "Allele",
+        "Allele Quality",
+        "Contig",
+        "Bitscore",
+        "Query length",
+        "Contig length",
+        "New sequence length",
+        "Mismatch",
+        "gaps",
+        "Contig start",
+        "Contig end",
+        "New sequence",
+        "Predicted Sequence",
+    ]
+    header_asm = [
+        "Core Gene",
+        "Sample Name",
+        "ASM tag",
+        "Gene Annotation",
+        "Product Annotation",
+        "Allele",
+        "Allele Quality",
+        "Contig",
+        "Bitscore",
+        "Query length",
+        "Contig length",
+        "New sequence length",
+        "Mismatch",
+        "gaps",
+        "Contig start",
+        "Contig end",
+        "New sequence",
+        "Additional info",
+        "Predicted Sequence",
+    ]
+    header_alm = [
+        "Core Gene",
+        "Sample Name",
+        "ALM tag",
+        "Gene Annotation",
+        "Product Annotation",
+        "Allele",
+        "Allele Quality",
+        "Contig",
+        "Bitscore",
+        "Query length",
+        "Contig length",
+        "New sequence length",
+        "Mismatch",
+        "gaps",
+        "Contig start",
+        "Contig end",
+        "New sequence",
+        "Additional info",
+        "Predicted Sequence",
+    ]
+    header_plot = [
+        "Core Gene",
+        "Sample Name",
+        "Gene Annotation",
+        "Product Annotation",
+        "Allele",
+        "Allele Quality",
+        "Contig",
+        "Bitscore",
+        "Contig start",
+        "Contig end",
+        "Sequence",
+        "Predicted Sequence",
+    ]
+    header_lnf_tpr = [
+        "Core Gene",
+        "Sample Name",
+        "Gene Annotation",
+        "Product Annotation",
+        "Tag",
+        "Allele",
+        "Allele Quality",
+        "ID %",
+        "Blast sequence coverage %",
+        "New sequence coverage %",
+        "Query length",
+        "Contig length",
+        "New sequence length",
+        "Additional info",
+    ]
+    header_snp = [
+        "Core Gene",
+        "Sample Name",
+        "Allele",
+        "Position",
+        "Mutation Schema/Sample",
+        "Codon Schema/Sample",
+        "Amino acid in Schema/Sample",
+        "Mutation type",
+        "Annotation Schema/Sample",
+    ]
+    header_protein = ["Core Gene", "Sample Name", "Protein in ", "Protein sequence"]
+    header_match_alignment = ["Core Gene", "Sample Name", "Alignment", "Sequence"]
+    header_stprofile = ["Sample Name", "ST"]
 
     # Añadido header_prodigal_report para report prodigal
-#    header_prodigal_report = ['Core gene', 'Sample Name', 'Allele', 'Sequence type', 'BLAST start', 'BLAST end', 'Prodigal start', 'Prodigal end', 'BLAST sequence', 'Prodigal sequence']
+    #    header_prodigal_report = ['Core gene', 'Sample Name', 'Allele', 'Sequence type', 'BLAST start', 'BLAST end', 'Prodigal start', 'Prodigal end', 'BLAST sequence', 'Prodigal sequence']
     # Añadido header_newsseq_coverage_report para determinar coverage threshold a imponer
-#    header_newsseq_coverage_report = ['Core gene', 'Sample Name', 'Query length', 'New sequence length', 'Locus mean', 'Coverage (new sequence/allele)', 'Coverage (new sequence/locus mean)']
+    #    header_newsseq_coverage_report = ['Core gene', 'Sample Name', 'Query length', 'New sequence length', 'Locus mean', 'Coverage (new sequence/allele)', 'Coverage (new sequence/locus mean)']
     # Añadido header_blast_coverage_report para determinar coverage threshold a imponer
-#    header_blast_coverage_report = ['Core gene', 'Sample Name', 'Query length', 'Blast sequence length', 'Locus mean', 'Coverage (blast sequence/allele)', 'Coverage (blast sequence/locus mean)']
+    #    header_blast_coverage_report = ['Core gene', 'Sample Name', 'Query length', 'Blast sequence length', 'Locus mean', 'Coverage (blast sequence/allele)', 'Coverage (blast sequence/locus mean)']
 
     ## Saving the result information to file
-    print ('Saving results to files \n')
-    result_file = os.path.join ( outputdir, 'result.tsv')
-    logger.info('Saving result information to file..')
-    with open (result_file, 'w') as out_fh:
-        out_fh.write ('Sample Name\t'+'\t'.join( full_gene_list) + '\n')
-        for key in sorted (samples_matrix_dict):
-            out_fh.write (key + '\t' + '\t'.join(samples_matrix_dict[key])+ '\n')
+    print("Saving results to files \n")
+    result_file = os.path.join(outputdir, "result.tsv")
+    logger.info("Saving result information to file..")
+    with open(result_file, "w") as out_fh:
+        out_fh.write("Sample Name\t" + "\t".join(full_gene_list) + "\n")
+        for key in sorted(samples_matrix_dict):
+            out_fh.write(key + "\t" + "\t".join(samples_matrix_dict[key]) + "\n")
 
     ## Saving exact matches to file
-    logger.info('Saving exact matches information to file..')
-    exact_file =  os.path.join(outputdir, 'exact.tsv')
-    with open (exact_file , 'w') as exact_fh :
-        exact_fh.write('\t'.join(header_exact)+ '\n')
+    logger.info("Saving exact matches information to file..")
+    exact_file = os.path.join(outputdir, "exact.tsv")
+    with open(exact_file, "w") as exact_fh:
+        exact_fh.write("\t".join(header_exact) + "\n")
         for sample in sorted(exact_dict):
             for core in sorted(exact_dict[sample]):
-                exact_fh.write(core + '\t' + sample + '\t' + '\t'.join(exact_dict[sample][core]) + '\n')
+                exact_fh.write(
+                    core
+                    + "\t"
+                    + sample
+                    + "\t"
+                    + "\t".join(exact_dict[sample][core])
+                    + "\n"
+                )
 
     ## Saving paralog alleles to file
-    logger.info('Saving paralog information to file..')
-    paralog_file =  os.path.join(outputdir, 'paralog.tsv')
-    with open (paralog_file , 'w') as paralog_fh :
-        paralog_fh.write('\t'.join(header_paralogs) + '\n')
-        for sample in sorted (paralog_dict) :
-            for core in sorted (paralog_dict[sample]):
-                for paralog in paralog_dict[sample][core] :
-                    paralog_fh.write(core + '\t' + sample + '\t' + '\t'.join (paralog) + '\n')
+    logger.info("Saving paralog information to file..")
+    paralog_file = os.path.join(outputdir, "paralog.tsv")
+    with open(paralog_file, "w") as paralog_fh:
+        paralog_fh.write("\t".join(header_paralogs) + "\n")
+        for sample in sorted(paralog_dict):
+            for core in sorted(paralog_dict[sample]):
+                for paralog in paralog_dict[sample][core]:
+                    paralog_fh.write(
+                        core + "\t" + sample + "\t" + "\t".join(paralog) + "\n"
+                    )
 
     ## Saving inferred alleles to file
-    logger.info('Saving inferred alleles information to file..')
-    inferred_file =  os.path.join(outputdir, 'inferred_alleles.tsv')
-    with open (inferred_file , 'w') as infer_fh :
-        infer_fh.write('\t'.join(header_inferred) + '\n')
-        for core in sorted (inf_dict) :
-            for sample in sorted (inf_dict[core]) :
+    logger.info("Saving inferred alleles information to file..")
+    inferred_file = os.path.join(outputdir, "inferred_alleles.tsv")
+    with open(inferred_file, "w") as infer_fh:
+        infer_fh.write("\t".join(header_inferred) + "\n")
+        for core in sorted(inf_dict):
+            for sample in sorted(inf_dict[core]):
                 for inferred in inf_dict[core][sample]:
                     #   seq_in_inferred_allele = '\t'.join (inf_dict[sample])
-                    infer_fh.write(core + '\t' + sample + '\t' + inferred + '\t' + '\t'.join(inf_dict[core][sample][inferred]) + '\n')
+                    infer_fh.write(
+                        core
+                        + "\t"
+                        + sample
+                        + "\t"
+                        + inferred
+                        + "\t"
+                        + "\t".join(inf_dict[core][sample][inferred])
+                        + "\n"
+                    )
 
     ## Saving PLOTs to file
-    logger.info('Saving PLOT information to file..')
-    plot_file =  os.path.join(outputdir, 'plot.tsv')
-    with open (plot_file , 'w') as plot_fh :
-        plot_fh.write('\t'.join(header_plot) + '\n')
-        for core in sorted (plot_dict) :
-            for sample in sorted (plot_dict[core]):
-                plot_fh.write(core + '\t' + sample + '\t' + '\t'.join(plot_dict[core][sample]) + '\n')
+    logger.info("Saving PLOT information to file..")
+    plot_file = os.path.join(outputdir, "plot.tsv")
+    with open(plot_file, "w") as plot_fh:
+        plot_fh.write("\t".join(header_plot) + "\n")
+        for core in sorted(plot_dict):
+            for sample in sorted(plot_dict[core]):
+                plot_fh.write(
+                    core
+                    + "\t"
+                    + sample
+                    + "\t"
+                    + "\t".join(plot_dict[core][sample])
+                    + "\n"
+                )
 
     ## Saving matching contigs to file
-    logger.info('Saving matching information to file..')
-    matching_file =  os.path.join(outputdir, 'matching_contigs.tsv')
-    with open (matching_file , 'w') as matching_fh :
-        matching_fh.write('\t'.join(header_matching_alleles_contig) + '\n')
-        for samples in sorted (matching_genes_dict) :
-            for contigs in matching_genes_dict[samples] :
-                for contig in matching_genes_dict[samples] [contigs]:
-                        matching_alleles = '\t'.join (contig)
-                        matching_fh.write(samples + '\t' + contigs +'\t' + matching_alleles + '\n')
+    logger.info("Saving matching information to file..")
+    matching_file = os.path.join(outputdir, "matching_contigs.tsv")
+    with open(matching_file, "w") as matching_fh:
+        matching_fh.write("\t".join(header_matching_alleles_contig) + "\n")
+        for samples in sorted(matching_genes_dict):
+            for contigs in matching_genes_dict[samples]:
+                for contig in matching_genes_dict[samples][contigs]:
+                    matching_alleles = "\t".join(contig)
+                    matching_fh.write(
+                        samples + "\t" + contigs + "\t" + matching_alleles + "\n"
+                    )
 
     ## Saving ASMs to file
-    logger.info('Saving asm information to file..')
-    asm_file =  os.path.join(outputdir, 'asm.tsv')
-    with open (asm_file , 'w') as asm_fh :
-        asm_fh.write('\t'.join(header_asm)+ '\n')
-        for core in list_asm :
-            for sample in list_asm[core] :
+    logger.info("Saving asm information to file..")
+    asm_file = os.path.join(outputdir, "asm.tsv")
+    with open(asm_file, "w") as asm_fh:
+        asm_fh.write("\t".join(header_asm) + "\n")
+        for core in list_asm:
+            for sample in list_asm[core]:
                 for asm in list_asm[core][sample]:
-                    asm_fh.write(core + '\t' + sample + '\t' + asm + '\t' + '\t'.join(list_asm[core][sample][asm]) + '\n')
+                    asm_fh.write(
+                        core
+                        + "\t"
+                        + sample
+                        + "\t"
+                        + asm
+                        + "\t"
+                        + "\t".join(list_asm[core][sample][asm])
+                        + "\n"
+                    )
 
     ## Saving ALMs to file
-    logger.info('Saving alm information to file..')
-    alm_file =  os.path.join(outputdir, 'alm.tsv')
-    with open (alm_file , 'w') as alm_fh :
-        alm_fh.write('\t'.join(header_alm)+ '\n')
-        for core in list_alm :
-            for sample in list_alm[core] :
+    logger.info("Saving alm information to file..")
+    alm_file = os.path.join(outputdir, "alm.tsv")
+    with open(alm_file, "w") as alm_fh:
+        alm_fh.write("\t".join(header_alm) + "\n")
+        for core in list_alm:
+            for sample in list_alm[core]:
                 for alm in list_alm[core][sample]:
-                    alm_fh.write(core + '\t' + sample + '\t' + alm + '\t' + '\t'.join(list_alm[core][sample][alm]) + '\n')
+                    alm_fh.write(
+                        core
+                        + "\t"
+                        + sample
+                        + "\t"
+                        + alm
+                        + "\t"
+                        + "\t".join(list_alm[core][sample][alm])
+                        + "\n"
+                    )
 
     ## Saving LNFs to file
-    logger.info('Saving lnf information to file..')
-    lnf_file =  os.path.join(outputdir, 'lnf_tpr.tsv')
-    with open (lnf_file , 'w') as lnf_fh :
-        lnf_fh.write('\t'.join(header_lnf_tpr)+ '\n')
-        for core in lnf_tpr_dict :
-            for sample in lnf_tpr_dict[core] :
-                for lnf in lnf_tpr_dict[core][sample] :
-                    lnf_fh.write(core + '\t' + sample + '\t' + '\t'.join(lnf) + '\n')
+    logger.info("Saving lnf information to file..")
+    lnf_file = os.path.join(outputdir, "lnf_tpr.tsv")
+    with open(lnf_file, "w") as lnf_fh:
+        lnf_fh.write("\t".join(header_lnf_tpr) + "\n")
+        for core in lnf_tpr_dict:
+            for sample in lnf_tpr_dict[core]:
+                for lnf in lnf_tpr_dict[core][sample]:
+                    lnf_fh.write(core + "\t" + sample + "\t" + "\t".join(lnf) + "\n")
 
     ## Saving SNPs information to file
-    logger.info('Saving SNPs information to file..')
-    snp_file =  os.path.join(outputdir, 'snp.tsv')
-    with open (snp_file , 'w') as snp_fh :
-        snp_fh.write('\t'.join(header_snp) + '\n')
-        for core in sorted (snp_dict) :
-            for sample in sorted (snp_dict[core]):
-                for allele_id_snp in snp_dict[core][sample] :
-                    for snp in snp_dict[core][sample][allele_id_snp] :
-                        snp_fh.write(core + '\t' + sample + '\t' + allele_id_snp + '\t' + '\t'.join (snp) + '\n')
+    logger.info("Saving SNPs information to file..")
+    snp_file = os.path.join(outputdir, "snp.tsv")
+    with open(snp_file, "w") as snp_fh:
+        snp_fh.write("\t".join(header_snp) + "\n")
+        for core in sorted(snp_dict):
+            for sample in sorted(snp_dict[core]):
+                for allele_id_snp in snp_dict[core][sample]:
+                    for snp in snp_dict[core][sample][allele_id_snp]:
+                        snp_fh.write(
+                            core
+                            + "\t"
+                            + sample
+                            + "\t"
+                            + allele_id_snp
+                            + "\t"
+                            + "\t".join(snp)
+                            + "\n"
+                        )
 
     ## Saving DNA sequences alignments to file
-    logger.info('Saving matching alignment information to files..')
-    alignment_dir = os.path.join(outputdir,'alignments')
-    if os.path.exists(alignment_dir) :
+    logger.info("Saving matching alignment information to files..")
+    alignment_dir = os.path.join(outputdir, "alignments")
+    if os.path.exists(alignment_dir):
         shutil.rmtree(alignment_dir)
-        logger.info('deleting the alignment files from previous execution')
+        logger.info("deleting the alignment files from previous execution")
     os.makedirs(alignment_dir)
-    for core in sorted(match_alignment_dict) :
-        for sample in sorted (match_alignment_dict[core]) :
-            match_alignment_file = os.path.join(alignment_dir, str('match_alignment_' + core + '_' + sample + '.txt'))
-            with open(match_alignment_file, 'w') as match_alignment_fh :
-                match_alignment_fh.write( '\t'.join(header_match_alignment) + '\n')
-                for match_align in match_alignment_dict[core][sample] :
-                    match_alignment_fh.write(core + '\t'+ sample +'\t'+ '\t'.join(match_align) + '\n')
+    for core in sorted(match_alignment_dict):
+        for sample in sorted(match_alignment_dict[core]):
+            match_alignment_file = os.path.join(
+                alignment_dir, str("match_alignment_" + core + "_" + sample + ".txt")
+            )
+            with open(match_alignment_file, "w") as match_alignment_fh:
+                match_alignment_fh.write("\t".join(header_match_alignment) + "\n")
+                for match_align in match_alignment_dict[core][sample]:
+                    match_alignment_fh.write(
+                        core + "\t" + sample + "\t" + "\t".join(match_align) + "\n"
+                    )
 
     ## Saving protein sequences alignments to file
-    logger.info('Saving protein information to files..')
-    protein_dir = os.path.join(outputdir,'proteins')
-    if os.path.exists(protein_dir) :
+    logger.info("Saving protein information to files..")
+    protein_dir = os.path.join(outputdir, "proteins")
+    if os.path.exists(protein_dir):
         shutil.rmtree(protein_dir)
-        logger.info('deleting the proteins files from previous execution')
+        logger.info("deleting the proteins files from previous execution")
     os.makedirs(protein_dir)
-    for core in sorted(protein_dict) :
-        for sample in sorted (protein_dict[core]) :
-            protein_file = os.path.join(protein_dir, str('protein_' + core + '_' + sample + '.txt'))
-            with open(protein_file, 'w') as protein_fh :
-                protein_fh.write( '\t'.join(header_protein) + '\n')
-                for protein in protein_dict[core][sample] :
-                    protein_fh.write(core + '\t'+ sample +'\t'+ '\t'.join(protein) + '\n')
+    for core in sorted(protein_dict):
+        for sample in sorted(protein_dict[core]):
+            protein_file = os.path.join(
+                protein_dir, str("protein_" + core + "_" + sample + ".txt")
+            )
+            with open(protein_file, "w") as protein_fh:
+                protein_fh.write("\t".join(header_protein) + "\n")
+                for protein in protein_dict[core][sample]:
+                    protein_fh.write(
+                        core + "\t" + sample + "\t" + "\t".join(protein) + "\n"
+                    )
 
     ## Saving summary information to file
-    logger.info('Saving summary information to file..')
-    summary_result = create_summary (samples_matrix_dict, logger)
-    summary_file = os.path.join( outputdir, 'summary_result.tsv')
-    with open (summary_file , 'w') as summ_fh:
-        for line in summary_result :
-            summ_fh.write(line + '\n')
+    logger.info("Saving summary information to file..")
+    summary_result = create_summary(samples_matrix_dict, logger)
+    summary_file = os.path.join(outputdir, "summary_result.tsv")
+    with open(summary_file, "w") as summ_fh:
+        for line in summary_result:
+            summ_fh.write(line + "\n")
 
     ## Modify the result file to remove the PLOT_ string for creating the file to use in the tree diagram
-#    logger.info('Saving result information for tree diagram')
-#    tree_diagram_file = os.path.join ( outputdir, 'result_for_tree_diagram.tsv')
-#    with open (result_file, 'r') as result_fh:
-#        with open(tree_diagram_file, 'w') as td_fh:
-#            for line in result_fh:
-#                tree_line = line.replace('PLOT_','')
-#                td_fh.write(tree_line)
+    #    logger.info('Saving result information for tree diagram')
+    #    tree_diagram_file = os.path.join ( outputdir, 'result_for_tree_diagram.tsv')
+    #    with open (result_file, 'r') as result_fh:
+    #        with open(tree_diagram_file, 'w') as td_fh:
+    #            for line in result_fh:
+    #                tree_line = line.replace('PLOT_','')
+    #                td_fh.write(tree_line)
 
     ###########################################################################################
     # Guardando report de prodigal. Temporal
-#    prodigal_report_file = os.path.join (outputdir, 'prodigal_report.tsv')
+    #    prodigal_report_file = os.path.join (outputdir, 'prodigal_report.tsv')
     # saving prodigal predictions to file
-#    with open (prodigal_report_file, 'w') as out_fh:
-#        out_fh.write ('\t'.join(header_prodigal_report)+ '\n')
-#        for prodigal_result in prodigal_report:
-#            out_fh.write ('\t'.join(prodigal_result)+ '\n')
+    #    with open (prodigal_report_file, 'w') as out_fh:
+    #        out_fh.write ('\t'.join(header_prodigal_report)+ '\n')
+    #        for prodigal_result in prodigal_report:
+    #            out_fh.write ('\t'.join(prodigal_result)+ '\n')
 
     # Guardando coverage de new_sseq para estimar el threshold a establecer. Temporal
-#    newsseq_coverage_file = os.path.join (outputdir, 'newsseq_coverage_report.tsv')
+    #    newsseq_coverage_file = os.path.join (outputdir, 'newsseq_coverage_report.tsv')
     # saving the coverage information to file
-#    with open (newsseq_coverage_file, 'w') as out_fh:
-#        out_fh.write ('\t' + '\t'.join(header_newsseq_coverage_report)+ '\n')
-#        for coverage in shorter_seq_coverage:
-#            out_fh.write ('Shorter new sequence' + '\t' + '\t'.join(coverage)+ '\n')
-#        for coverage in longer_seq_coverage:
-#            out_fh.write ('Longer new sequence' + '\t' + '\t'.join(coverage)+ '\n')
-#        for coverage in equal_seq_coverage:
-#            out_fh.write ('Same length new sequence' + '\t' + '\t'.join(coverage)+ '\n')
+    #    with open (newsseq_coverage_file, 'w') as out_fh:
+    #        out_fh.write ('\t' + '\t'.join(header_newsseq_coverage_report)+ '\n')
+    #        for coverage in shorter_seq_coverage:
+    #            out_fh.write ('Shorter new sequence' + '\t' + '\t'.join(coverage)+ '\n')
+    #        for coverage in longer_seq_coverage:
+    #            out_fh.write ('Longer new sequence' + '\t' + '\t'.join(coverage)+ '\n')
+    #        for coverage in equal_seq_coverage:
+    #            out_fh.write ('Same length new sequence' + '\t' + '\t'.join(coverage)+ '\n')
 
     # Guardando coverage de la sseq obtenida tras blast para estimar el threshold a establecer. Temporal
-#    blast_coverage_file = os.path.join (outputdir, 'blast_coverage_report.tsv')
+    #    blast_coverage_file = os.path.join (outputdir, 'blast_coverage_report.tsv')
     # saving the result information to file
-#    with open (blast_coverage_file, 'w') as out_fh:
-#        out_fh.write ('\t' + '\t'.join(header_blast_coverage_report)+ '\n')
-#        for coverage in shorter_blast_seq_coverage:
-#            out_fh.write ('Shorter blast sequence' + '\t' + '\t'.join(coverage)+ '\n')
-#        for coverage in longer_blast_seq_coverage:
-#            out_fh.write ('Longer blast sequence' + '\t' + '\t'.join(coverage)+ '\n')
-#        for coverage in equal_blast_seq_coverage:
-#            out_fh.write ('Same length blast sequence' + '\t' + '\t'.join(coverage)+ '\n')
+    #    with open (blast_coverage_file, 'w') as out_fh:
+    #        out_fh.write ('\t' + '\t'.join(header_blast_coverage_report)+ '\n')
+    #        for coverage in shorter_blast_seq_coverage:
+    #            out_fh.write ('Shorter blast sequence' + '\t' + '\t'.join(coverage)+ '\n')
+    #        for coverage in longer_blast_seq_coverage:
+    #            out_fh.write ('Longer blast sequence' + '\t' + '\t'.join(coverage)+ '\n')
+    #        for coverage in equal_blast_seq_coverage:
+    #            out_fh.write ('Same length blast sequence' + '\t' + '\t'.join(coverage)+ '\n')
     ###########################################################################################
 
     return True
 
 
-
-def save_allele_calling_plots (outputdir, sample_list_files, count_exact, count_inf, count_asm, count_alm, count_lnf, count_tpr, count_plot, count_niph, count_niphem, count_error, logger):
-
+def save_allele_calling_plots(
+    outputdir,
+    sample_list_files,
+    count_exact,
+    count_inf,
+    count_asm,
+    count_alm,
+    count_lnf,
+    count_tpr,
+    count_plot,
+    count_niph,
+    count_niphem,
+    count_error,
+    logger,
+):
     ## Create result plots directory
-    plots_dir = os.path.join(outputdir,'plots')
+    plots_dir = os.path.join(outputdir, "plots")
     try:
         os.makedirs(plots_dir)
     except:
-        logger.info('Deleting the results plots directory for a previous execution without cleaning up')
-        shutil.rmtree(os.path.join(outputdir, 'plots'))
+        logger.info(
+            "Deleting the results plots directory for a previous execution without cleaning up"
+        )
+        shutil.rmtree(os.path.join(outputdir, "plots"))
         try:
             os.makedirs(plots_dir)
-            logger.info ('Results plots folder %s  has been created again', plots_dir)
+            logger.info("Results plots folder %s  has been created again", plots_dir)
         except:
-            logger.info('Unable to create again the results plots directory %s', plots_dir)
-            print('Cannot create Results plots directory on ', plots_dir)
+            logger.info(
+                "Unable to create again the results plots directory %s", plots_dir
+            )
+            print("Cannot create Results plots directory on ", plots_dir)
             exit(0)
 
     for sample_file in sample_list_files:
-        sample_name = '.'.join(os.path.basename(sample_file).split('.')[:-1])
+        sample_name = ".".join(os.path.basename(sample_file).split(".")[:-1])
 
         ## Obtain interactive piechart
-        logger.info('Creating interactive results piecharts')
-        create_sunburst_allele_call (outputdir, sample_name, count_exact[sample_name], count_inf[sample_name], count_asm[sample_name], count_alm[sample_name], count_lnf[sample_name], count_tpr[sample_name], count_plot[sample_name], count_niph[sample_name], count_niphem[sample_name], count_error[sample_name])
+        logger.info("Creating interactive results piecharts")
+        create_sunburst_allele_call(
+            outputdir,
+            sample_name,
+            count_exact[sample_name],
+            count_inf[sample_name],
+            count_asm[sample_name],
+            count_alm[sample_name],
+            count_lnf[sample_name],
+            count_tpr[sample_name],
+            count_plot[sample_name],
+            count_niph[sample_name],
+            count_niphem[sample_name],
+            count_error[sample_name],
+        )
 
     return True
 
 
-
-def create_sunburst_allele_call (outputdir, sample_name, count_exact, count_inf, count_asm, count_alm, count_lnf, count_tpr, count_plot, count_niph, count_niphem, count_error):
-                            ### logger
-
-    total_locus = count_exact['total'] + count_inf['total'] + count_asm['total'] + count_alm['total'] + count_lnf['total'] + count_tpr['total'] + count_plot['total'] \
-                    + count_niph['total'] + count_niphem['total'] + count_error['total']
-
-    tag_counts = [total_locus, count_exact['total'], count_exact['good_quality'], count_exact['bad_quality'], count_exact['no_start'], count_exact['no_start_stop'],
-                    count_exact['no_stop'], count_exact['multiple_stop'], count_inf['total'], count_inf['good_quality'], count_inf['bad_quality'], count_inf['no_start'],
-                    count_inf['no_start_stop'], count_inf['no_stop'], count_inf['multiple_stop'], count_asm['total'], count_asm['insertion'], count_asm['deletion'],
-                    count_asm['substitution'], count_alm['total'], count_alm['insertion'], count_alm['deletion'], count_alm['substitution'], count_plot['total'],
-                    count_niph['total'], count_niph['good_quality'], count_niph['bad_quality'], count_niph['no_start'], count_niph['no_start_stop'], count_niph['no_stop'],
-                    count_niph['multiple_stop'], count_niphem['total'], count_niphem['good_quality'], count_niphem['bad_quality'], count_niphem['no_start'],
-                    count_niphem['no_start_stop'], count_niphem['no_stop'], count_niphem['multiple_stop'], count_lnf['total'], count_lnf['not_found'], count_lnf['low_id'],
-                    count_lnf['low_coverage'], count_tpr['total'], count_tpr['good_quality'], count_tpr['bad_quality'], count_tpr['no_start'], count_tpr['no_start_stop'],
-                    count_tpr['no_stop'], count_tpr['multiple_stop'], count_error['total'], count_error['good_quality'], count_error['bad_quality'], count_error['no_start'],
-                    count_error['no_start_stop'], count_error['no_stop'], count_error['multiple_stop']]
-
-    fig=go.Figure(go.Sunburst(
-    ids=[
-        sample_name, "Exact Match", "Good Quality - Exact Match", "Bad Quality - Exact Match",
-        "No start - Bad Quality - Exact Match", "No start-stop - Bad Quality - Exact Match",
-        "No stop - Bad Quality - Exact Match", "Multiple stop - Bad Quality - Exact Match",
-        "INF", "Good Quality - INF", "Bad Quality - INF", "No start - Bad Quality - INF",
-        "No start-stop - Bad Quality - INF", "No stop - Bad Quality - INF", "Multiple stop - Bad Quality - INF",
-        "ASM", "Insertion - ASM", "Deletion - ASM", "Substitution - ASM", "ALM", "Insertion - ALM",
-        "Deletion - ALM", "Substitution - ALM", "PLOT", "NIPH", "Good Quality - NIPH",
-        "Bad Quality - NIPH", "No start - Bad Quality - NIPH", "No start-stop - Bad Quality - NIPH",
-        "No stop - Bad Quality - NIPH", "Multiple stop - Bad Quality - NIPH", "NIPHEM",
-        "Good Quality - NIPHEM", "Bad Quality - NIPHEM", "No start - Bad Quality - NIPHEM",
-        "No start-stop - Bad Quality - NIPHEM", "No stop - Bad Quality - NIPHEM",
-        "Multiple stop - Bad Quality - NIPHEM", "LNF", "Not found",
-        "Low ID", "Low coverage", "TPR", "Good Quality - TPR", "Bad Quality - TPR",
-        "No start - Bad Quality - TPR", "No start-stop - Bad Quality - TPR", "No stop - Bad Quality - TPR",
-        "Multiple stop - Bad Quality - TPR", "Error", "Good Quality - Error", "Bad Quality - Error",
-        "No start - Bad Quality - Error", "No start-stop - Bad Quality - Error",
-        "No stop - Bad Quality - Error", "Multiple stop - Bad Quality - Error"
-    ],
-    labels= [
-        sample_name, "Exact<br>Match", "Good<br>Quality", "Bad<br>Quality",
-        "No<br>start", "No<br>start-stop", "No<br>stop", "Multiple<br>stop",
-        "INF", "Good<br>Quality", "Bad<br>Quality", "No<br>start",
-        "No<br>start-stop", "No<br>stop", "Multiple<br>stop", "ASM", "Insertion",
-        "Deletion", "Substitution", "ALM", "Insertion", "Deletion",
-        "Substitution", "PLOT", "NIPH", "Good<br>Quality", "Bad<br>Quality",
-        "No<br>start", "No<br>start-stop", "No<br>stop", "Multiple<br>stop",
-        "NIPHEM", "Good<br>Quality", "Bad<br>Quality", "No<br>start",
-        "No<br>start-stop", "No<br>stop", "Multiple<br>stop", "LNF", "Not<br>found",
-        "Low<br>ID", "Low<br>coverage", "TPR", "Good<br>Quality", "Bad<br>Quality",
-        "No<br>start", "No<br>start-stop", "No<br>stop", "Multiple<br>stop",
-        "Error", "Good<br>Quality", "Bad<br>Quality","No<br>start",
-        "No<br>start-stop", "No<br>stop", "Multiple<br>stop"
-    ],
-    parents=[
-        "", sample_name, "Exact Match", "Exact Match", "Bad Quality - Exact Match",
-        "Bad Quality - Exact Match", "Bad Quality - Exact Match", "Bad Quality - Exact Match",
-        sample_name, "INF", "INF", "Bad Quality - INF", "Bad Quality - INF", "Bad Quality - INF",
-        "Bad Quality - INF", sample_name, "ASM", "ASM", "ASM", sample_name, "ALM", "ALM", "ALM", sample_name,
-        sample_name, "NIPH", "NIPH", "Bad Quality - NIPH", "Bad Quality - NIPH", "Bad Quality - NIPH",
-        "Bad Quality - NIPH", sample_name, "NIPHEM", "NIPHEM", "Bad Quality - NIPHEM",
-        "Bad Quality - NIPHEM", "Bad Quality - NIPHEM", "Bad Quality - NIPHEM", sample_name, "LNF",
-        "LNF", "LNF", sample_name, "TPR", "TPR", "Bad Quality - TPR", "Bad Quality - TPR",
-        "Bad Quality - TPR", "Bad Quality - TPR", sample_name, "Error", "Error", "Bad Quality - Error",
-        "Bad Quality - Error", "Bad Quality - Error", "Bad Quality - Error"
-    ],
-    values=tag_counts,
-    branchvalues="total",
-    ))
-
-    fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
-
-    plotsdir = os.path.join(outputdir, 'plots', sample_name + '.html')
+def create_sunburst_allele_call(
+    outputdir,
+    sample_name,
+    count_exact,
+    count_inf,
+    count_asm,
+    count_alm,
+    count_lnf,
+    count_tpr,
+    count_plot,
+    count_niph,
+    count_niphem,
+    count_error,
+):
+    ### logger
+
+    total_locus = (
+        count_exact["total"]
+        + count_inf["total"]
+        + count_asm["total"]
+        + count_alm["total"]
+        + count_lnf["total"]
+        + count_tpr["total"]
+        + count_plot["total"]
+        + count_niph["total"]
+        + count_niphem["total"]
+        + count_error["total"]
+    )
+
+    tag_counts = [
+        total_locus,
+        count_exact["total"],
+        count_exact["good_quality"],
+        count_exact["bad_quality"],
+        count_exact["no_start"],
+        count_exact["no_start_stop"],
+        count_exact["no_stop"],
+        count_exact["multiple_stop"],
+        count_inf["total"],
+        count_inf["good_quality"],
+        count_inf["bad_quality"],
+        count_inf["no_start"],
+        count_inf["no_start_stop"],
+        count_inf["no_stop"],
+        count_inf["multiple_stop"],
+        count_asm["total"],
+        count_asm["insertion"],
+        count_asm["deletion"],
+        count_asm["substitution"],
+        count_alm["total"],
+        count_alm["insertion"],
+        count_alm["deletion"],
+        count_alm["substitution"],
+        count_plot["total"],
+        count_niph["total"],
+        count_niph["good_quality"],
+        count_niph["bad_quality"],
+        count_niph["no_start"],
+        count_niph["no_start_stop"],
+        count_niph["no_stop"],
+        count_niph["multiple_stop"],
+        count_niphem["total"],
+        count_niphem["good_quality"],
+        count_niphem["bad_quality"],
+        count_niphem["no_start"],
+        count_niphem["no_start_stop"],
+        count_niphem["no_stop"],
+        count_niphem["multiple_stop"],
+        count_lnf["total"],
+        count_lnf["not_found"],
+        count_lnf["low_id"],
+        count_lnf["low_coverage"],
+        count_tpr["total"],
+        count_tpr["good_quality"],
+        count_tpr["bad_quality"],
+        count_tpr["no_start"],
+        count_tpr["no_start_stop"],
+        count_tpr["no_stop"],
+        count_tpr["multiple_stop"],
+        count_error["total"],
+        count_error["good_quality"],
+        count_error["bad_quality"],
+        count_error["no_start"],
+        count_error["no_start_stop"],
+        count_error["no_stop"],
+        count_error["multiple_stop"],
+    ]
+
+    fig = go.Figure(
+        go.Sunburst(
+            ids=[
+                sample_name,
+                "Exact Match",
+                "Good Quality - Exact Match",
+                "Bad Quality - Exact Match",
+                "No start - Bad Quality - Exact Match",
+                "No start-stop - Bad Quality - Exact Match",
+                "No stop - Bad Quality - Exact Match",
+                "Multiple stop - Bad Quality - Exact Match",
+                "INF",
+                "Good Quality - INF",
+                "Bad Quality - INF",
+                "No start - Bad Quality - INF",
+                "No start-stop - Bad Quality - INF",
+                "No stop - Bad Quality - INF",
+                "Multiple stop - Bad Quality - INF",
+                "ASM",
+                "Insertion - ASM",
+                "Deletion - ASM",
+                "Substitution - ASM",
+                "ALM",
+                "Insertion - ALM",
+                "Deletion - ALM",
+                "Substitution - ALM",
+                "PLOT",
+                "NIPH",
+                "Good Quality - NIPH",
+                "Bad Quality - NIPH",
+                "No start - Bad Quality - NIPH",
+                "No start-stop - Bad Quality - NIPH",
+                "No stop - Bad Quality - NIPH",
+                "Multiple stop - Bad Quality - NIPH",
+                "NIPHEM",
+                "Good Quality - NIPHEM",
+                "Bad Quality - NIPHEM",
+                "No start - Bad Quality - NIPHEM",
+                "No start-stop - Bad Quality - NIPHEM",
+                "No stop - Bad Quality - NIPHEM",
+                "Multiple stop - Bad Quality - NIPHEM",
+                "LNF",
+                "Not found",
+                "Low ID",
+                "Low coverage",
+                "TPR",
+                "Good Quality - TPR",
+                "Bad Quality - TPR",
+                "No start - Bad Quality - TPR",
+                "No start-stop - Bad Quality - TPR",
+                "No stop - Bad Quality - TPR",
+                "Multiple stop - Bad Quality - TPR",
+                "Error",
+                "Good Quality - Error",
+                "Bad Quality - Error",
+                "No start - Bad Quality - Error",
+                "No start-stop - Bad Quality - Error",
+                "No stop - Bad Quality - Error",
+                "Multiple stop - Bad Quality - Error",
+            ],
+            labels=[
+                sample_name,
+                "Exact<br>Match",
+                "Good<br>Quality",
+                "Bad<br>Quality",
+                "No<br>start",
+                "No<br>start-stop",
+                "No<br>stop",
+                "Multiple<br>stop",
+                "INF",
+                "Good<br>Quality",
+                "Bad<br>Quality",
+                "No<br>start",
+                "No<br>start-stop",
+                "No<br>stop",
+                "Multiple<br>stop",
+                "ASM",
+                "Insertion",
+                "Deletion",
+                "Substitution",
+                "ALM",
+                "Insertion",
+                "Deletion",
+                "Substitution",
+                "PLOT",
+                "NIPH",
+                "Good<br>Quality",
+                "Bad<br>Quality",
+                "No<br>start",
+                "No<br>start-stop",
+                "No<br>stop",
+                "Multiple<br>stop",
+                "NIPHEM",
+                "Good<br>Quality",
+                "Bad<br>Quality",
+                "No<br>start",
+                "No<br>start-stop",
+                "No<br>stop",
+                "Multiple<br>stop",
+                "LNF",
+                "Not<br>found",
+                "Low<br>ID",
+                "Low<br>coverage",
+                "TPR",
+                "Good<br>Quality",
+                "Bad<br>Quality",
+                "No<br>start",
+                "No<br>start-stop",
+                "No<br>stop",
+                "Multiple<br>stop",
+                "Error",
+                "Good<br>Quality",
+                "Bad<br>Quality",
+                "No<br>start",
+                "No<br>start-stop",
+                "No<br>stop",
+                "Multiple<br>stop",
+            ],
+            parents=[
+                "",
+                sample_name,
+                "Exact Match",
+                "Exact Match",
+                "Bad Quality - Exact Match",
+                "Bad Quality - Exact Match",
+                "Bad Quality - Exact Match",
+                "Bad Quality - Exact Match",
+                sample_name,
+                "INF",
+                "INF",
+                "Bad Quality - INF",
+                "Bad Quality - INF",
+                "Bad Quality - INF",
+                "Bad Quality - INF",
+                sample_name,
+                "ASM",
+                "ASM",
+                "ASM",
+                sample_name,
+                "ALM",
+                "ALM",
+                "ALM",
+                sample_name,
+                sample_name,
+                "NIPH",
+                "NIPH",
+                "Bad Quality - NIPH",
+                "Bad Quality - NIPH",
+                "Bad Quality - NIPH",
+                "Bad Quality - NIPH",
+                sample_name,
+                "NIPHEM",
+                "NIPHEM",
+                "Bad Quality - NIPHEM",
+                "Bad Quality - NIPHEM",
+                "Bad Quality - NIPHEM",
+                "Bad Quality - NIPHEM",
+                sample_name,
+                "LNF",
+                "LNF",
+                "LNF",
+                sample_name,
+                "TPR",
+                "TPR",
+                "Bad Quality - TPR",
+                "Bad Quality - TPR",
+                "Bad Quality - TPR",
+                "Bad Quality - TPR",
+                sample_name,
+                "Error",
+                "Error",
+                "Bad Quality - Error",
+                "Bad Quality - Error",
+                "Bad Quality - Error",
+                "Bad Quality - Error",
+            ],
+            values=tag_counts,
+            branchvalues="total",
+        )
+    )
+
+    fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))
+
+    plotsdir = os.path.join(outputdir, "plots", sample_name + ".html")
 
     fig.write_html(plotsdir)
 
@@ -1656,11 +2715,19 @@ def create_sunburst_allele_call (outputdir, sample_name, count_exact, count_inf,
 # Update core genes schema adding new inferred alleles found for each locus in allele calling analysis #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · *  #
 
-def update_schema (updateschema, schemadir, outputdir, core_gene_list_files, inferred_alleles_dict, alleles_in_locus_dict, logger):
 
+def update_schema(
+    updateschema,
+    schemadir,
+    outputdir,
+    core_gene_list_files,
+    inferred_alleles_dict,
+    alleles_in_locus_dict,
+    logger,
+):
     if len(inferred_alleles_dict) > 0:
         ## Create a copy of core genes schema if updateschema = 'new' / 'New'
-        if updateschema == 'new':
+        if updateschema == "new":
             no_updated_schemadir = schemadir
             ##schemadir_name = os.path.dirname(no_updated_schemadir) ---> se puede usar si guardo finalmente el nuevo esquema en el mismo directorio que el antiguo esquema, pero para ello debo verificar si termina o no el path en / para eliminarlo o no del path antes de hacer el dirname
             schemadir_name = no_updated_schemadir.split("/")
@@ -1669,52 +2736,82 @@ def update_schema (updateschema, schemadir, outputdir, core_gene_list_files, inf
             else:
                 schemadir_name = schemadir_name[-1]
 
-            schemadir = os.path.join(outputdir, schemadir_name + '_updated')
+            schemadir = os.path.join(outputdir, schemadir_name + "_updated")
 
             try:
                 shutil.copytree(no_updated_schemadir, schemadir)
-                logger.info ('Schema copy %s has been created to update schema', schemadir)
+                logger.info(
+                    "Schema copy %s has been created to update schema", schemadir
+                )
             except:
-                logger.info('Deleting preexisting directory')
+                logger.info("Deleting preexisting directory")
                 shutil.rmtree(schemadir)
                 try:
                     shutil.copytree(no_updated_schemadir, schemadir)
-                    logger.info ('Schema copy %s has been created to update schema', schemadir)
+                    logger.info(
+                        "Schema copy %s has been created to update schema", schemadir
+                    )
                 except:
-                    logger.info('Unable to create schema copy %s', schemadir)
-                    print('Cannot create schema copy on ', schemadir)
+                    logger.info("Unable to create schema copy %s", schemadir)
+                    print("Cannot create schema copy on ", schemadir)
                     exit(0)
 
         ## Get INF alleles for each core gene and update each locus fasta file
         for core_file in core_gene_list_files:
-            core_name = os.path.basename(core_file).split('.')[0]
+            core_name = os.path.basename(core_file).split(".")[0]
             if core_name in inferred_alleles_dict:
-                logger.info('Updating core gene file %s adding new INF alleles', core_file)
+                logger.info(
+                    "Updating core gene file %s adding new INF alleles", core_file
+                )
 
                 inf_list = inferred_alleles_dict[core_name]
 
                 try:
-                    alleles_ids = [int(allele) for allele in alleles_in_locus_dict[core_name]]
+                    alleles_ids = [
+                        int(allele) for allele in alleles_in_locus_dict[core_name]
+                    ]
                     allele_number = max(alleles_ids)
 
-                    locus_schema_file = os.path.join(schemadir, core_name + '.fasta')
-                    with open (locus_schema_file, 'a') as core_fh:
+                    locus_schema_file = os.path.join(schemadir, core_name + ".fasta")
+                    with open(locus_schema_file, "a") as core_fh:
                         for inf in inf_list:
                             allele_number += 1
-                            core_fh.write('\n' + '>' + str(allele_number) + ' # ' + 'INF by Taranis' + '\n' + inf + '\n')
+                            core_fh.write(
+                                "\n"
+                                + ">"
+                                + str(allele_number)
+                                + " # "
+                                + "INF by Taranis"
+                                + "\n"
+                                + inf
+                                + "\n"
+                            )
                 except:
-                    alleles_ids = [int(allele.split('_')[-1]) for allele in alleles_in_locus_dict[core_name]]
+                    alleles_ids = [
+                        int(allele.split("_")[-1])
+                        for allele in alleles_in_locus_dict[core_name]
+                    ]
                     allele_number = max(alleles_ids)
 
-                    locus_schema_file = os.path.join(schemadir, core_name + '.fasta')
-                    with open (locus_schema_file, 'a') as core_fh:
+                    locus_schema_file = os.path.join(schemadir, core_name + ".fasta")
+                    with open(locus_schema_file, "a") as core_fh:
                         for inf in inf_list:
                             allele_number += 1
-                            complete_inf_id = core_name + '_' + str(allele_number)
-                            core_fh.write('\n' + '>' + complete_inf_id + ' # ' + 'INF by Taranis' + '\n' + inf + '\n')
+                            complete_inf_id = core_name + "_" + str(allele_number)
+                            core_fh.write(
+                                "\n"
+                                + ">"
+                                + complete_inf_id
+                                + " # "
+                                + "INF by Taranis"
+                                + "\n"
+                                + inf
+                                + "\n"
+                            )
 
     return True
 
+
 """
 def update_schema (updateschema, schemadir, storedir, core_gene_list_files, inferred_alleles_dict, alleles_in_locus_dict, logger):
 
@@ -1751,32 +2848,67 @@ def update_schema (updateschema, schemadir, storedir, core_gene_list_files, infe
 # Allele calling analysis to find each core gene in schema and its variants in samples #
 # · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · * · *  #
 
-def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in_locus_dict, contigs_in_sample_dict, query_directory, reference_alleles_directory, blast_db_directory, prodigal_directory, blast_results_seq_directory, blast_results_db_directory, inputdir, outputdir, cpus, percentlength, coverage, evalue, perc_identity_ref, perc_identity_loc, reward, penalty, gapopen, gapextend, max_target_seqs, max_hsps, num_threads, flankingnts, schema_variability, schema_statistics, schema_quality, annotation_core_dict, profile_csv_path, logger ):
 
-    prodigal_report = [] # TEMPORAL. prodigal_report para checkear las secuencias obtenidas con prodigal vs blast y las posiciones sstart y send
+def allele_call_nucleotides(
+    core_gene_list_files,
+    sample_list_files,
+    alleles_in_locus_dict,
+    contigs_in_sample_dict,
+    query_directory,
+    reference_alleles_directory,
+    blast_db_directory,
+    prodigal_directory,
+    blast_results_seq_directory,
+    blast_results_db_directory,
+    inputdir,
+    outputdir,
+    cpus,
+    percentlength,
+    coverage,
+    evalue,
+    perc_identity_ref,
+    perc_identity_loc,
+    reward,
+    penalty,
+    gapopen,
+    gapextend,
+    max_target_seqs,
+    max_hsps,
+    num_threads,
+    flankingnts,
+    schema_variability,
+    schema_statistics,
+    schema_quality,
+    annotation_core_dict,
+    profile_csv_path,
+    logger,
+):
+    prodigal_report = (
+        []
+    )  # TEMPORAL. prodigal_report para checkear las secuencias obtenidas con prodigal vs blast y las posiciones sstart y send
     # listas añadidas para calcular coverage medio de new_sseq con respecto a alelo para establecer coverage mínimo por debajo del cual considerar LNF
-    shorter_seq_coverage = [] # TEMPORAL
-    longer_seq_coverage = [] # TEMPORAL
-    equal_seq_coverage = [] # TEMPORAL
+    shorter_seq_coverage = []  # TEMPORAL
+    longer_seq_coverage = []  # TEMPORAL
+    equal_seq_coverage = []  # TEMPORAL
     # listas añadidas para calcular coverage medio de sseq con respecto a alelo tras blast para establecer coverage mínimo por debajo del cual considerar LNF
-    shorter_blast_seq_coverage = [] # TEMPORAL
-    longer_blast_seq_coverage = [] # TEMPORAL
-    equal_blast_seq_coverage = [] # TEMPORAL
+    shorter_blast_seq_coverage = []  # TEMPORAL
+    longer_blast_seq_coverage = []  # TEMPORAL
+    equal_blast_seq_coverage = []  # TEMPORAL
 
     full_gene_list = []
-    samples_matrix_dict = {} # to keep allele number
-    matching_genes_dict = {} # to keep start and stop positions
-    exact_dict = {} # c/m: to keep exact matches found for each sample
-    inferred_alleles_dict = {} # to keep track of the new inferred alleles
-    inf_dict = {} # to keep inferred alleles found for each sample
-    paralog_dict = {} # to keep paralogs found for each sample
-    asm_dict = {} # c/m: to keep track of asm
-    alm_dict = {} # c/m: to keep track of alm
-    list_asm = {} # c/m: to keep asm found for each sample
-    list_alm = {} # c/m: to keep alm found for each sample
-    lnf_tpr_dict = {} # c/m: to keep locus not found for each sample
-    plot_dict = {} # c/m: to keep plots for each sample
-    snp_dict = {} # c/m: to keep snp information for each sample
+    samples_matrix_dict = {}  # to keep allele number
+    matching_genes_dict = {}  # to keep start and stop positions
+    exact_dict = {}  # c/m: to keep exact matches found for each sample
+    inferred_alleles_dict = {}  # to keep track of the new inferred alleles
+    inf_dict = {}  # to keep inferred alleles found for each sample
+    paralog_dict = {}  # to keep paralogs found for each sample
+    asm_dict = {}  # c/m: to keep track of asm
+    alm_dict = {}  # c/m: to keep track of alm
+    list_asm = {}  # c/m: to keep asm found for each sample
+    list_alm = {}  # c/m: to keep alm found for each sample
+    lnf_tpr_dict = {}  # c/m: to keep locus not found for each sample
+    plot_dict = {}  # c/m: to keep plots for each sample
+    snp_dict = {}  # c/m: to keep snp information for each sample
     protein_dict = {}
     match_alignment_dict = {}
 
@@ -1794,71 +2926,138 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
 
     blast_parameters = '"6 , qseqid , sseqid , pident ,  qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq"'
 
-    print('Allele calling starts')
-    pbar = ProgressBar ()
-
+    print("Allele calling starts")
+    pbar = ProgressBar()
 
     ## # # # # # # # # # # # # # # # # # # # # # # # # ##
     ## Processing the search for each schema core gene ##
     ## # # # # # # # # # # # # # # # # # # # # # # # # ##
 
-    for core_file in pbar(core_gene_list_files) :
-        core_name = os.path.basename(core_file).split('.')[0]
+    for core_file in pbar(core_gene_list_files):
+        core_name = os.path.basename(core_file).split(".")[0]
         full_gene_list.append(core_name)
-        logger.info('Processing core gene file %s ', core_file)
+        logger.info("Processing core gene file %s ", core_file)
 
         # Get path to this locus fasta file
-        locus_alleles_path = os.path.join(query_directory, str(core_name + '.fasta'))
+        locus_alleles_path = os.path.join(query_directory, str(core_name + ".fasta"))
 
         # Get path to reference allele fasta file for this locus
-        core_reference_allele_path = os.path.join(reference_alleles_directory, core_name + '.fasta')
+        core_reference_allele_path = os.path.join(
+            reference_alleles_directory, core_name + ".fasta"
+        )
 
         # Get length thresholds for INF, ASM and ALM classification
-        max_length_threshold, min_length_threshold = length_thresholds(core_name, schema_statistics, percentlength)
+        max_length_threshold, min_length_threshold = length_thresholds(
+            core_name, schema_statistics, percentlength
+        )
 
         # Get length thresholds for LNF, ASM and ALM classification
-        max_coverage_threshold, min_coverage_threshold = length_thresholds(core_name, schema_statistics, coverage)
+        max_coverage_threshold, min_coverage_threshold = length_thresholds(
+            core_name, schema_statistics, coverage
+        )
 
         ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ##
         ## Processing the search for each schema core gene in each sample  ##
         ## # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ##
 
         for sample_file in sample_list_files:
-            logger.info('Processing sample file %s ', sample_file)
+            logger.info("Processing sample file %s ", sample_file)
 
-            sample_name = '.'.join(os.path.basename(sample_file).split('.')[:-1])
+            sample_name = ".".join(os.path.basename(sample_file).split(".")[:-1])
 
             # (recuento tags para plots)
             if sample_name not in count_exact:
-                count_exact[sample_name] = {"good_quality" : 0, "bad_quality" : 0, "no_start" : 0, "no_start_stop" : 0, "no_stop" : 0, "multiple_stop" : 0, "total" : 0}
+                count_exact[sample_name] = {
+                    "good_quality": 0,
+                    "bad_quality": 0,
+                    "no_start": 0,
+                    "no_start_stop": 0,
+                    "no_stop": 0,
+                    "multiple_stop": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_inf:
-                count_inf[sample_name] = {"good_quality" : 0, "bad_quality" : 0, "no_start" : 0, "no_start_stop" : 0, "no_stop" : 0, "multiple_stop" : 0, "total" : 0}
+                count_inf[sample_name] = {
+                    "good_quality": 0,
+                    "bad_quality": 0,
+                    "no_start": 0,
+                    "no_start_stop": 0,
+                    "no_stop": 0,
+                    "multiple_stop": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_asm:
-                count_asm[sample_name] = {"insertion" : 0, "deletion" : 0, "substitution" : 0, "total" : 0}
+                count_asm[sample_name] = {
+                    "insertion": 0,
+                    "deletion": 0,
+                    "substitution": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_alm:
-                count_alm[sample_name] = {"insertion" : 0, "deletion" : 0, "substitution" : 0, "total" : 0}
+                count_alm[sample_name] = {
+                    "insertion": 0,
+                    "deletion": 0,
+                    "substitution": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_lnf:
-                count_lnf[sample_name] = {"not_found" : 0, "low_id" : 0, "low_coverage" : 0, "total" : 0}
+                count_lnf[sample_name] = {
+                    "not_found": 0,
+                    "low_id": 0,
+                    "low_coverage": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_tpr:
-                count_tpr[sample_name] = {"good_quality" : 0, "bad_quality" : 0, "no_start" : 0, "no_start_stop" : 0, "no_stop" : 0, "multiple_stop" : 0, "total" : 0}
+                count_tpr[sample_name] = {
+                    "good_quality": 0,
+                    "bad_quality": 0,
+                    "no_start": 0,
+                    "no_start_stop": 0,
+                    "no_stop": 0,
+                    "multiple_stop": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_plot:
-                count_plot[sample_name] = {"total" : 0}
+                count_plot[sample_name] = {"total": 0}
 
             if sample_name not in count_niph:
-                count_niph[sample_name] = {"good_quality" : 0, "bad_quality" : 0, "no_start" : 0, "no_start_stop" : 0, "no_stop" : 0, "multiple_stop" : 0, "total" : 0}
+                count_niph[sample_name] = {
+                    "good_quality": 0,
+                    "bad_quality": 0,
+                    "no_start": 0,
+                    "no_start_stop": 0,
+                    "no_stop": 0,
+                    "multiple_stop": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_niphem:
-                count_niphem[sample_name] = {"good_quality" : 0, "bad_quality" : 0, "no_start" : 0, "no_start_stop" : 0, "no_stop" : 0, "multiple_stop" : 0, "total" : 0}
+                count_niphem[sample_name] = {
+                    "good_quality": 0,
+                    "bad_quality": 0,
+                    "no_start": 0,
+                    "no_start_stop": 0,
+                    "no_stop": 0,
+                    "multiple_stop": 0,
+                    "total": 0,
+                }
 
             if sample_name not in count_error:
-                count_error[sample_name] = {"good_quality" : 0, "bad_quality" : 0, "no_start" : 0, "no_start_stop" : 0, "no_stop" : 0, "multiple_stop" : 0, "total" : 0}
-
+                count_error[sample_name] = {
+                    "good_quality": 0,
+                    "bad_quality": 0,
+                    "no_start": 0,
+                    "no_start_stop": 0,
+                    "no_stop": 0,
+                    "multiple_stop": 0,
+                    "total": 0,
+                }
 
             # Initialize the sample list to add the number of alleles and the start, stop positions
             if not sample_name in samples_matrix_dict:
@@ -1872,7 +3071,20 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
             # Sample contigs VS reference allele(s) BLAST for locus detection in sample #
             # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * #
 
-            cline = NcbiblastnCommandline(db=blast_db_name, evalue=evalue, perc_identity=perc_identity_ref, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=core_reference_allele_path)
+            cline = NcbiblastnCommandline(
+                db=blast_db_name,
+                evalue=evalue,
+                perc_identity=perc_identity_ref,
+                reward=reward,
+                penalty=penalty,
+                gapopen=gapopen,
+                gapextend=gapextend,
+                outfmt=blast_parameters,
+                max_target_seqs=max_target_seqs,
+                max_hsps=max_hsps,
+                num_threads=num_threads,
+                query=core_reference_allele_path,
+            )
             out, err = cline()
             out_lines = out.splitlines()
 
@@ -1881,49 +3093,113 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
             # ······························································ #
             # LNF if there are no BLAST results for this gene in this sample #
             # ······························································ #
-            if len (out_lines) == 0:
-
+            if len(out_lines) == 0:
                 # Trying to get the allele number to avoid that a bad quality assembly impact on the tree diagram
-                cline = NcbiblastnCommandline(db=blast_db_name, evalue=evalue, perc_identity = 70, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=1, max_hsps=1, num_threads=1, query=core_reference_allele_path)
+                cline = NcbiblastnCommandline(
+                    db=blast_db_name,
+                    evalue=evalue,
+                    perc_identity=70,
+                    reward=reward,
+                    penalty=penalty,
+                    gapopen=gapopen,
+                    gapextend=gapextend,
+                    outfmt=blast_parameters,
+                    max_target_seqs=1,
+                    max_hsps=1,
+                    num_threads=1,
+                    query=core_reference_allele_path,
+                )
                 out, err = cline()
                 out_lines = out.splitlines()
 
-                if len (out_lines) > 0 :
-
-                    for line in out_lines :
-                        values = line.split('\t')
-                        if  float(values[8]) > bigger_bitscore:
-                            qseqid , sseqid , pident ,  qlen , s_length , mismatch , r_gapopen , r_evalue , bitscore , sstart , send , qstart , qend ,sseq , qseq = values
+                if len(out_lines) > 0:
+                    for line in out_lines:
+                        values = line.split("\t")
+                        if float(values[8]) > bigger_bitscore:
+                            (
+                                qseqid,
+                                sseqid,
+                                pident,
+                                qlen,
+                                s_length,
+                                mismatch,
+                                r_gapopen,
+                                r_evalue,
+                                bitscore,
+                                sstart,
+                                send,
+                                qstart,
+                                qend,
+                                sseq,
+                                qseq,
+                            ) = values
                             bigger_bitscore = float(bitscore)
 
                     # Keep LNF info
-                    lnf_tpr_tag(core_name, sample_name, alleles_in_locus_dict, samples_matrix_dict, lnf_tpr_dict, schema_statistics, locus_alleles_path, qseqid, pident, '-', '-', perc_identity_ref, '-', schema_quality, annotation_core_dict, count_lnf, logger)
+                    lnf_tpr_tag(
+                        core_name,
+                        sample_name,
+                        alleles_in_locus_dict,
+                        samples_matrix_dict,
+                        lnf_tpr_dict,
+                        schema_statistics,
+                        locus_alleles_path,
+                        qseqid,
+                        pident,
+                        "-",
+                        "-",
+                        perc_identity_ref,
+                        "-",
+                        schema_quality,
+                        annotation_core_dict,
+                        count_lnf,
+                        logger,
+                    )
 
                 else:
                     # Keep LNF info
-                    lnf_tpr_tag(core_name, sample_name, '-', samples_matrix_dict, lnf_tpr_dict, schema_statistics, locus_alleles_path, '-', '-', '-', '-', '-', '-', schema_quality, annotation_core_dict, count_lnf, logger)
+                    lnf_tpr_tag(
+                        core_name,
+                        sample_name,
+                        "-",
+                        samples_matrix_dict,
+                        lnf_tpr_dict,
+                        schema_statistics,
+                        locus_alleles_path,
+                        "-",
+                        "-",
+                        "-",
+                        "-",
+                        "-",
+                        "-",
+                        schema_quality,
+                        annotation_core_dict,
+                        count_lnf,
+                        logger,
+                    )
 
                 continue
 
             ## Continue classification process if the core gene has been detected in sample after BLAST search
-            if len (out_lines) > 0:
-
+            if len(out_lines) > 0:
                 # Parse contigs for this sample
-                #contig_file = os.path.join(inputdir, sample_name + ".fasta") ## parse
-                #records = list(SeqIO.parse(contig_file, "fasta")) ## parse
+                # contig_file = os.path.join(inputdir, sample_name + ".fasta") ## parse
+                # records = list(SeqIO.parse(contig_file, "fasta")) ## parse
 
                 ## Keep BLAST results after locus detection in sample using reference allele
 
                 # Path to BLAST results fasta file
-                path_to_blast_seq = os.path.join(blast_results_seq_directory, sample_name, core_name + "_blast.fasta")
+                path_to_blast_seq = os.path.join(
+                    blast_results_seq_directory, sample_name, core_name + "_blast.fasta"
+                )
 
-                with open (path_to_blast_seq, 'w') as outblast_fh:
+                with open(path_to_blast_seq, "w") as outblast_fh:
                     seq_number = 1
-                    for line in out_lines :
-                        values = line.split('\t')
+                    for line in out_lines:
+                        values = line.split("\t")
                         qseqid = values[0]
                         if values[1] not in contigs_in_sample_dict[sample_name]:
-                            sseqid = '|'.join(values[1].split('|')[1:-1])
+                            sseqid = "|".join(values[1].split("|")[1:-1])
                         else:
                             sseqid = values[1]
                         sstart = values[9]
@@ -1932,10 +3208,10 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
                         # Get flanked BLAST sequences from contig for correct allele tagging
 
                         accession_sequence = contigs_in_sample_dict[sample_name][sseqid]
-                        #for record in records: ## parse
-                            #if record.id == sseqid : ## parse
-                                #break ## parse
-                        #accession_sequence = str(record.seq) ## parse
+                        # for record in records: ## parse
+                        # if record.id == sseqid : ## parse
+                        # break ## parse
+                        # accession_sequence = str(record.seq) ## parse
 
                         if int(send) > int(sstart):
                             max_index = int(send)
@@ -1946,94 +3222,188 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
 
                         if (flankingnts + 1) <= min_index:
                             if flankingnts <= (len(accession_sequence) - max_index):
-                                flanked_sseq = accession_sequence[ min_index -1 -flankingnts : max_index + flankingnts ]
+                                flanked_sseq = accession_sequence[
+                                    min_index
+                                    - 1
+                                    - flankingnts : max_index
+                                    + flankingnts
+                                ]
                             else:
-                                flanked_sseq = accession_sequence[ min_index -1 -flankingnts : ]
+                                flanked_sseq = accession_sequence[
+                                    min_index - 1 - flankingnts :
+                                ]
                         else:
-                            flanked_sseq = accession_sequence[ : max_index + flankingnts ]
-
-                        seq_id = str(seq_number) + '_' + sseqid
-                        outblast_fh.write('>' + seq_id + ' # ' + ' # '.join(values[0:13]) + '\n' + flanked_sseq + '\n' + '\n' )
+                            flanked_sseq = accession_sequence[: max_index + flankingnts]
+
+                        seq_id = str(seq_number) + "_" + sseqid
+                        outblast_fh.write(
+                            ">"
+                            + seq_id
+                            + " # "
+                            + " # ".join(values[0:13])
+                            + "\n"
+                            + flanked_sseq
+                            + "\n"
+                            + "\n"
+                        )
 
                         seq_number += 1
 
                 ## Create local BLAST database for BLAST results after locus detection in sample using reference allele
                 db_name = os.path.join(blast_results_db_directory, sample_name)
-                if not create_blastdb(path_to_blast_seq, db_name, 'nucl', logger):
-                    print('Error when creating the blastdb for blast results file for locus %s at sample %s. Check log file for more information. \n ', core_name, sample_name)
+                if not create_blastdb(path_to_blast_seq, db_name, "nucl", logger):
+                    print(
+                        "Error when creating the blastdb for blast results file for locus %s at sample %s. Check log file for more information. \n ",
+                        core_name,
+                        sample_name,
+                    )
                     return False
 
                 # Path to local BLAST database for BLAST results after locus detection in sample using reference allele
-                locus_blast_db_name = os.path.join(blast_results_db_directory, sample_name, os.path.basename(core_name) + '_blast', os.path.basename(core_name) + '_blast')
-
+                locus_blast_db_name = os.path.join(
+                    blast_results_db_directory,
+                    sample_name,
+                    os.path.basename(core_name) + "_blast",
+                    os.path.basename(core_name) + "_blast",
+                )
 
                 # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * #
                 # BLAST result sequences VS ALL alleles in locus BLAST for allele identification detection in sample  #
                 # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * #
 
-                cline = NcbiblastnCommandline(db=locus_blast_db_name, evalue=evalue, perc_identity=perc_identity_loc, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt = blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=locus_alleles_path)
+                cline = NcbiblastnCommandline(
+                    db=locus_blast_db_name,
+                    evalue=evalue,
+                    perc_identity=perc_identity_loc,
+                    reward=reward,
+                    penalty=penalty,
+                    gapopen=gapopen,
+                    gapextend=gapextend,
+                    outfmt=blast_parameters,
+                    max_target_seqs=max_target_seqs,
+                    max_hsps=max_hsps,
+                    num_threads=num_threads,
+                    query=locus_alleles_path,
+                )
 
                 out, err = cline()
                 out_lines = out.splitlines()
 
-                allele_found = {} # To keep filtered BLAST results
+                allele_found = {}  # To keep filtered BLAST results
 
                 ## Check if there is any BLAST result with ID = 100 ##
                 for line in out_lines:
-
-                    values = line.split('\t')
+                    values = line.split("\t")
                     pident = values[2]
 
                     if float(pident) == 100:
-
-                        qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = values
+                        (
+                            qseqid,
+                            sseqid,
+                            pident,
+                            qlen,
+                            s_length,
+                            mismatch,
+                            r_gapopen,
+                            r_evalue,
+                            bitscore,
+                            sstart,
+                            send,
+                            qstart,
+                            qend,
+                            sseq,
+                            qseq,
+                        ) = values
 
                         # Parse core gene fasta file to get matching allele sequence and length
-                        #alleles_in_locus = list (SeqIO.parse(locus_alleles_path, "fasta")) ## parse
-                        #for allele in alleles_in_locus : ## parse
-                            #if allele.id == qseqid : ## parse
-                                #break ## comentar parse
-                        #matching_allele_seq = str(allele.seq) ## parse
-                        #matching_allele_length = len(matching_allele_seq) ## parse
+                        # alleles_in_locus = list (SeqIO.parse(locus_alleles_path, "fasta")) ## parse
+                        # for allele in alleles_in_locus : ## parse
+                        # if allele.id == qseqid : ## parse
+                        # break ## comentar parse
+                        # matching_allele_seq = str(allele.seq) ## parse
+                        # matching_allele_length = len(matching_allele_seq) ## parse
 
                         matching_allele_seq = alleles_in_locus_dict[core_name][qseqid]
                         matching_allele_length = len(matching_allele_seq)
 
                         # Keep BLAST results with ID = 100 and same length as matching allele
                         if int(s_length) == matching_allele_length:
-                            #get_blast_results (values, records, allele_found, logger)
-                            get_blast_results (sample_name, values, contigs_in_sample_dict, allele_found, logger)
+                            # get_blast_results (values, records, allele_found, logger)
+                            get_blast_results(
+                                sample_name,
+                                values,
+                                contigs_in_sample_dict,
+                                allele_found,
+                                logger,
+                            )
 
                 # ·································································································································· #
                 # NIPHEM (paralog) if there are multiple BLAST results with ID = 100 and same length as matching allele for this gene in this sample #
                 # ·································································································································· #
                 if len(allele_found) > 1:
-
                     # Keep NIPHEM info
-                    paralog_exact_tag(sample_name, core_name, 'NIPHEM', schema_quality, matching_genes_dict, samples_matrix_dict, allele_found, paralog_dict, prodigal_report, prodigal_directory, blast_parameters, annotation_core_dict, count_niphem, logger)
+                    paralog_exact_tag(
+                        sample_name,
+                        core_name,
+                        "NIPHEM",
+                        schema_quality,
+                        matching_genes_dict,
+                        samples_matrix_dict,
+                        allele_found,
+                        paralog_dict,
+                        prodigal_report,
+                        prodigal_directory,
+                        blast_parameters,
+                        annotation_core_dict,
+                        count_niphem,
+                        logger,
+                    )
 
                     continue
 
                 ## Check for possible paralogs with ID < 100 if there is only one BLAST result with ID = 100 and same length as matching allele
-                elif len(allele_found) == 1 :
+                elif len(allele_found) == 1:
+                    for line in out_lines:
+                        values = line.split("\t")
 
-                    for line in out_lines :
-                        values = line.split('\t')
-
-                        sseq_no_gaps = values[13].replace('-', '')
+                        sseq_no_gaps = values[13].replace("-", "")
                         s_length_no_gaps = len(sseq_no_gaps)
 
                         # Keep BLAST result if its coverage is within min and max thresholds
-                        if min_length_threshold <= s_length_no_gaps <= max_length_threshold:
-                            #get_blast_results (values, records, allele_found, logger)
-                            get_blast_results (sample_name, values, contigs_in_sample_dict, allele_found, logger)
+                        if (
+                            min_length_threshold
+                            <= s_length_no_gaps
+                            <= max_length_threshold
+                        ):
+                            # get_blast_results (values, records, allele_found, logger)
+                            get_blast_results(
+                                sample_name,
+                                values,
+                                contigs_in_sample_dict,
+                                allele_found,
+                                logger,
+                            )
 
                     # ································································ #
                     # EXACT MATCH if there is any paralog for this gene in this sample #
                     # ································································ #
-                    if len(allele_found) == 1 :
-
-                        paralog_exact_tag(sample_name, core_name, 'EXACT', schema_quality, matching_genes_dict, samples_matrix_dict, allele_found, exact_dict, prodigal_report, prodigal_directory, blast_parameters, annotation_core_dict, count_exact, logger)
+                    if len(allele_found) == 1:
+                        paralog_exact_tag(
+                            sample_name,
+                            core_name,
+                            "EXACT",
+                            schema_quality,
+                            matching_genes_dict,
+                            samples_matrix_dict,
+                            allele_found,
+                            exact_dict,
+                            prodigal_report,
+                            prodigal_directory,
+                            blast_parameters,
+                            annotation_core_dict,
+                            count_exact,
+                            logger,
+                        )
 
                         continue
 
@@ -2041,111 +3411,267 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
                     # NIPH if there there are paralogs with ID < 100 for this gene in this sample #
                     # ··········································································· #
                     else:
-
-                        paralog_exact_tag(sample_name, core_name, 'NIPH', schema_quality, matching_genes_dict, samples_matrix_dict, allele_found, paralog_dict, prodigal_report, prodigal_directory, blast_parameters, annotation_core_dict, count_niph, logger)
+                        paralog_exact_tag(
+                            sample_name,
+                            core_name,
+                            "NIPH",
+                            schema_quality,
+                            matching_genes_dict,
+                            samples_matrix_dict,
+                            allele_found,
+                            paralog_dict,
+                            prodigal_report,
+                            prodigal_directory,
+                            blast_parameters,
+                            annotation_core_dict,
+                            count_niph,
+                            logger,
+                        )
 
                         continue
 
                 ## Look for the best BLAST result if there are no results with ID = 100 ##
                 elif len(allele_found) == 0:
-
                     bigger_bitscore_seq_values = []
 
-                    for line in out_lines :
-                        values = line.split('\t')
+                    for line in out_lines:
+                        values = line.split("\t")
 
-                        if  float(values[8]) > bigger_bitscore:
-                            s_length_no_gaps = len(values[13].replace('-', ''))
+                        if float(values[8]) > bigger_bitscore:
+                            s_length_no_gaps = len(values[13].replace("-", ""))
 
                             # Keep BLAST result if its coverage is within min and max thresholds and its bitscore is bigger than the one previously kept
-                            if min_coverage_threshold <= s_length_no_gaps <= max_coverage_threshold:
+                            if (
+                                min_coverage_threshold
+                                <= s_length_no_gaps
+                                <= max_coverage_threshold
+                            ):
                                 bigger_bitscore_seq_values = values
                                 bigger_bitscore = float(bigger_bitscore_seq_values[8])
 
                     ## Check if best BLAST result out of coverage thresholds is a possible PLOT or LNF due to low coverage ##
-                    #if len(allele_found) == 0:
+                    # if len(allele_found) == 0:
                     if len(bigger_bitscore_seq_values) == 0:
-
                         # Look for best bitscore BLAST result out of coverage thresholds to check possible PLOT or reporting LNF due to low coverage
 
-                        for line in out_lines :
-                            values = line.split('\t')
-
-                            if  float(values[8]) > bigger_bitscore:
-                                qseqid, sseqid, pident,  qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = values
-                                bigger_bitscore_seq_values_out_cov = values ###
+                        for line in out_lines:
+                            values = line.split("\t")
+
+                            if float(values[8]) > bigger_bitscore:
+                                (
+                                    qseqid,
+                                    sseqid,
+                                    pident,
+                                    qlen,
+                                    s_length,
+                                    mismatch,
+                                    r_gapopen,
+                                    r_evalue,
+                                    bitscore,
+                                    sstart,
+                                    send,
+                                    qstart,
+                                    qend,
+                                    sseq,
+                                    qseq,
+                                ) = values
+                                bigger_bitscore_seq_values_out_cov = values  ###
                                 bigger_bitscore = float(bitscore)
 
                         # Get BLAST values relatives to contig for bigger bitscore result
-                        lnf_plot_found = {} ###
-
-                        get_blast_results (sample_name, bigger_bitscore_seq_values_out_cov, contigs_in_sample_dict, lnf_plot_found, logger) ###
-
-                        allele_id = str(list(lnf_plot_found.keys())[0]) ###
-                        qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = lnf_plot_found[allele_id]
+                        lnf_plot_found = {}  ###
+
+                        get_blast_results(
+                            sample_name,
+                            bigger_bitscore_seq_values_out_cov,
+                            contigs_in_sample_dict,
+                            lnf_plot_found,
+                            logger,
+                        )  ###
+
+                        allele_id = str(list(lnf_plot_found.keys())[0])  ###
+                        (
+                            qseqid,
+                            sseqid,
+                            pident,
+                            qlen,
+                            s_length,
+                            mismatch,
+                            r_gapopen,
+                            r_evalue,
+                            bitscore,
+                            sstart,
+                            send,
+                            qstart,
+                            qend,
+                            sseq,
+                            qseq,
+                        ) = lnf_plot_found[allele_id]
 
                         # Get contig sequence and length for best bitscore BLAST result ID
 
-                        #for record in records: ## parse
-                            #if record.id == sseqid : ## parse
-                                #break ## parse
-                        #accession_sequence = record.seq ## parse
-                        #length_sseqid = len(accession_sequence) ## parse
+                        # for record in records: ## parse
+                        # if record.id == sseqid : ## parse
+                        # break ## parse
+                        # accession_sequence = record.seq ## parse
+                        # length_sseqid = len(accession_sequence) ## parse
 
                         accession_sequence = contigs_in_sample_dict[sample_name][sseqid]
                         length_sseqid = len(accession_sequence)
 
                         # Check if best BLAST result out of coverage thresholds is a possible PLOT. If so, keep result info for later PLOT classification
-                        if int(sstart) == length_sseqid or int(send) == length_sseqid or int(sstart) == 1 or int(send) == 1:
-                            bigger_bitscore_seq_values = bigger_bitscore_seq_values_out_cov ###
+                        if (
+                            int(sstart) == length_sseqid
+                            or int(send) == length_sseqid
+                            or int(sstart) == 1
+                            or int(send) == 1
+                        ):
+                            bigger_bitscore_seq_values = (
+                                bigger_bitscore_seq_values_out_cov  ###
+                            )
 
                         # ·············································································································································· #
                         # LNF if there are no BLAST results within coverage thresholds for this gene in this sample and best out threshold result is not a possible PLOT #
                         # ·············································································································································· #
                         else:
                             # Get sequence length
-                            s_length_no_gaps = len(bigger_bitscore_seq_values_out_cov[13].replace('-', ''))
+                            s_length_no_gaps = len(
+                                bigger_bitscore_seq_values_out_cov[13].replace("-", "")
+                            )
 
                             # Keep LNF info
-                            lnf_tpr_tag(core_name, sample_name, alleles_in_locus_dict, samples_matrix_dict, lnf_tpr_dict, schema_statistics, locus_alleles_path, qseqid, pident, s_length_no_gaps, '-', '-', coverage, schema_quality, annotation_core_dict, count_lnf, logger)
+                            lnf_tpr_tag(
+                                core_name,
+                                sample_name,
+                                alleles_in_locus_dict,
+                                samples_matrix_dict,
+                                lnf_tpr_dict,
+                                schema_statistics,
+                                locus_alleles_path,
+                                qseqid,
+                                pident,
+                                s_length_no_gaps,
+                                "-",
+                                "-",
+                                coverage,
+                                schema_quality,
+                                annotation_core_dict,
+                                count_lnf,
+                                logger,
+                            )
 
                     ## Keep result with bigger bitscore in allele_found dict and look for possible paralogs ##
                     if len(bigger_bitscore_seq_values) > 0:
-
-                        qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = bigger_bitscore_seq_values
-
-                        #get_blast_results (bigger_bitscore_seq_values, records, allele_found, logger)
-                        get_blast_results (sample_name, bigger_bitscore_seq_values, contigs_in_sample_dict, allele_found, logger)
+                        (
+                            qseqid,
+                            sseqid,
+                            pident,
+                            qlen,
+                            s_length,
+                            mismatch,
+                            r_gapopen,
+                            r_evalue,
+                            bitscore,
+                            sstart,
+                            send,
+                            qstart,
+                            qend,
+                            sseq,
+                            qseq,
+                        ) = bigger_bitscore_seq_values
+
+                        # get_blast_results (bigger_bitscore_seq_values, records, allele_found, logger)
+                        get_blast_results(
+                            sample_name,
+                            bigger_bitscore_seq_values,
+                            contigs_in_sample_dict,
+                            allele_found,
+                            logger,
+                        )
 
                         # Possible paralogs search
-                        for line in out_lines :
-                            values = line.split('\t')
-
-                            qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = values
-                            sseq_no_gaps = sseq.replace('-', '')
+                        for line in out_lines:
+                            values = line.split("\t")
+
+                            (
+                                qseqid,
+                                sseqid,
+                                pident,
+                                qlen,
+                                s_length,
+                                mismatch,
+                                r_gapopen,
+                                r_evalue,
+                                bitscore,
+                                sstart,
+                                send,
+                                qstart,
+                                qend,
+                                sseq,
+                                qseq,
+                            ) = values
+                            sseq_no_gaps = sseq.replace("-", "")
                             s_length_no_gaps = len(sseq_no_gaps)
 
-                            if min_length_threshold <= s_length_no_gaps <= max_length_threshold:
-
-                                #get_blast_results (values, records, allele_found, logger)
-                                get_blast_results (sample_name, values, contigs_in_sample_dict, allele_found, logger)
+                            if (
+                                min_length_threshold
+                                <= s_length_no_gaps
+                                <= max_length_threshold
+                            ):
+                                # get_blast_results (values, records, allele_found, logger)
+                                get_blast_results(
+                                    sample_name,
+                                    values,
+                                    contigs_in_sample_dict,
+                                    allele_found,
+                                    logger,
+                                )
 
                         # ····························································· #
                         # NIPH if there there are paralogs for this gene in this sample #
                         # ····························································· #
-                        if len(allele_found) > 1 :
-
-                            paralog_exact_tag(sample_name, core_name, 'NIPH', schema_quality, matching_genes_dict, samples_matrix_dict, allele_found, paralog_dict, prodigal_report, prodigal_directory, blast_parameters, annotation_core_dict, count_niph, logger)
+                        if len(allele_found) > 1:
+                            paralog_exact_tag(
+                                sample_name,
+                                core_name,
+                                "NIPH",
+                                schema_quality,
+                                matching_genes_dict,
+                                samples_matrix_dict,
+                                allele_found,
+                                paralog_dict,
+                                prodigal_report,
+                                prodigal_directory,
+                                blast_parameters,
+                                annotation_core_dict,
+                                count_niph,
+                                logger,
+                            )
 
                             continue
 
                         ## Continue classification if there are no paralogs ##
-                        elif len(allele_found) == 1 :
-
+                        elif len(allele_found) == 1:
                             allele_id = str(list(allele_found.keys())[0])
-                            qseqid, sseqid, pident, qlen, s_length, mismatch, r_gapopen, r_evalue, bitscore, sstart, send, qstart, qend, sseq, qseq = allele_found[allele_id]
-
-                            sseq_no_gaps = sseq.replace('-', '')
+                            (
+                                qseqid,
+                                sseqid,
+                                pident,
+                                qlen,
+                                s_length,
+                                mismatch,
+                                r_gapopen,
+                                r_evalue,
+                                bitscore,
+                                sstart,
+                                send,
+                                qstart,
+                                qend,
+                                sseq,
+                                qseq,
+                            ) = allele_found[allele_id]
+
+                            sseq_no_gaps = sseq.replace("-", "")
                             s_length_no_gaps = len(sseq_no_gaps)
 
                             # Get matching allele quality
@@ -2153,48 +3679,99 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
 
                             # Get matching allele sequence and length
 
-                            #alleles_in_locus = list (SeqIO.parse(locus_alleles_path, "fasta")) ## parse
-                            #for allele in alleles_in_locus : ## parse
-                                #if allele.id == qseqid : ## parse
-                                    #break ## parse
-                            #matching_allele_seq = allele.seq ## parse
-                            #matching_allele_length = len(matching_allele_seq) ## parse
+                            # alleles_in_locus = list (SeqIO.parse(locus_alleles_path, "fasta")) ## parse
+                            # for allele in alleles_in_locus : ## parse
+                            # if allele.id == qseqid : ## parse
+                            # break ## parse
+                            # matching_allele_seq = allele.seq ## parse
+                            # matching_allele_length = len(matching_allele_seq) ## parse
 
-                            matching_allele_seq = alleles_in_locus_dict [core_name][qseqid]
+                            matching_allele_seq = alleles_in_locus_dict[core_name][
+                                qseqid
+                            ]
                             matching_allele_length = len(matching_allele_seq)
 
                             # Get contig sequence and length for ID found in BLAST
 
-                            #for record in records: ## parse
-                                #if record.id == sseqid : ## parse
-                                    #break ## parse
-                            #accession_sequence = record.seq ## parse
-                            #length_sseqid = len(accession_sequence) ## parse
+                            # for record in records: ## parse
+                            # if record.id == sseqid : ## parse
+                            # break ## parse
+                            # accession_sequence = record.seq ## parse
+                            # length_sseqid = len(accession_sequence) ## parse
 
-                            accession_sequence = contigs_in_sample_dict[sample_name][sseqid]
+                            accession_sequence = contigs_in_sample_dict[sample_name][
+                                sseqid
+                            ]
                             length_sseqid = len(accession_sequence)
 
                             # ········································································································· #
                             # PLOT if found sequence is shorter than matching allele and it is located on the edge of the sample contig #
                             # ········································································································· #
-                            if int(sstart) == length_sseqid or int(send) == length_sseqid or int(sstart) == 1 or int(send) == 1:
+                            if (
+                                int(sstart) == length_sseqid
+                                or int(send) == length_sseqid
+                                or int(sstart) == 1
+                                or int(send) == 1
+                            ):
                                 if int(s_length) < matching_allele_length:
-
                                     ### sacar sec prodigal para PLOT?
                                     # Get prodigal predicted sequence if matching allele quality is "bad quality"
-                                    if 'bad_quality' in allele_quality:
-                                        complete_predicted_seq, start_prodigal, end_prodigal = get_prodigal_sequence(sseq_no_gaps, sseqid, prodigal_directory, sample_name, blast_parameters, logger)
+                                    if "bad_quality" in allele_quality:
+                                        (
+                                            complete_predicted_seq,
+                                            start_prodigal,
+                                            end_prodigal,
+                                        ) = get_prodigal_sequence(
+                                            sseq_no_gaps,
+                                            sseqid,
+                                            prodigal_directory,
+                                            sample_name,
+                                            blast_parameters,
+                                            logger,
+                                        )
 
                                         # Keep info for prodigal report
-                                        prodigal_report.append([core_name, sample_name, qseqid, 'PLOT', sstart, send, start_prodigal, end_prodigal, sseq_no_gaps, complete_predicted_seq])
+                                        prodigal_report.append(
+                                            [
+                                                core_name,
+                                                sample_name,
+                                                qseqid,
+                                                "PLOT",
+                                                sstart,
+                                                send,
+                                                start_prodigal,
+                                                end_prodigal,
+                                                sseq_no_gaps,
+                                                complete_predicted_seq,
+                                            ]
+                                        )
 
                                     else:
-                                        complete_predicted_seq = '-'
-                                        start_prodigal = '-'
-                                        end_prodigal = '-'
+                                        complete_predicted_seq = "-"
+                                        start_prodigal = "-"
+                                        end_prodigal = "-"
 
                                     # Keep PLOT info
-                                    inf_asm_alm_tag(core_name, sample_name, 'PLOT', allele_found[allele_id], allele_quality, '-', matching_allele_length, '-', plot_dict, samples_matrix_dict, matching_genes_dict, prodigal_report, start_prodigal, end_prodigal, complete_predicted_seq, annotation_core_dict, count_plot, logger)
+                                    inf_asm_alm_tag(
+                                        core_name,
+                                        sample_name,
+                                        "PLOT",
+                                        allele_found[allele_id],
+                                        allele_quality,
+                                        "-",
+                                        matching_allele_length,
+                                        "-",
+                                        plot_dict,
+                                        samples_matrix_dict,
+                                        matching_genes_dict,
+                                        prodigal_report,
+                                        start_prodigal,
+                                        end_prodigal,
+                                        complete_predicted_seq,
+                                        annotation_core_dict,
+                                        count_plot,
+                                        logger,
+                                    )
 
                                     continue
 
@@ -2203,121 +3780,345 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
                             # * * * * * * * * * * * * * * * * * * * * #
 
                             ## Get Prodigal predicted sequence ##
-                            complete_predicted_seq, start_prodigal, end_prodigal = get_prodigal_sequence(sseq_no_gaps, sseqid, prodigal_directory, sample_name, blast_parameters, logger)
+                            (
+                                complete_predicted_seq,
+                                start_prodigal,
+                                end_prodigal,
+                            ) = get_prodigal_sequence(
+                                sseq_no_gaps,
+                                sseqid,
+                                prodigal_directory,
+                                sample_name,
+                                blast_parameters,
+                                logger,
+                            )
 
                             ## Search for new codon stop using contig sequence info ##
 
                             # Check matching allele sequence direction
-                            query_direction = check_sequence_order(matching_allele_seq, logger)
+                            query_direction = check_sequence_order(
+                                matching_allele_seq, logger
+                            )
 
                             # Get extended BLAST sequence for stop codon search
-                            if query_direction == 'reverse':
-                                if int(send) > int (sstart):
-                                    sample_gene_sequence = accession_sequence[ : int(send) ]
-                                    sample_gene_sequence = str(Seq.Seq(sample_gene_sequence).reverse_complement())
+                            if query_direction == "reverse":
+                                if int(send) > int(sstart):
+                                    sample_gene_sequence = accession_sequence[
+                                        : int(send)
+                                    ]
+                                    sample_gene_sequence = str(
+                                        Seq.Seq(
+                                            sample_gene_sequence
+                                        ).reverse_complement()
+                                    )
                                 else:
-                                    sample_gene_sequence = accession_sequence[ int(send) -1 : ]
+                                    sample_gene_sequence = accession_sequence[
+                                        int(send) - 1 :
+                                    ]
 
                             else:
-                                if int(sstart) > int (send):
-                                    sample_gene_sequence = accession_sequence[ :  int(sstart) ]
-                                    sample_gene_sequence = str(Seq.Seq(sample_gene_sequence).reverse_complement())
+                                if int(sstart) > int(send):
+                                    sample_gene_sequence = accession_sequence[
+                                        : int(sstart)
+                                    ]
+                                    sample_gene_sequence = str(
+                                        Seq.Seq(
+                                            sample_gene_sequence
+                                        ).reverse_complement()
+                                    )
                                 else:
-                                    sample_gene_sequence = accession_sequence[ int(sstart) -1 : ]
+                                    sample_gene_sequence = accession_sequence[
+                                        int(sstart) - 1 :
+                                    ]
 
                             # Get new stop codon index
                             stop_index = get_stop_codon_index(sample_gene_sequence)
 
                             ## Classification of final new sequence if it is found ##
                             if stop_index != False:
-                                new_sequence_length = stop_index +3
-                                new_sseq = str(sample_gene_sequence[0:new_sequence_length])
+                                new_sequence_length = stop_index + 3
+                                new_sseq = str(
+                                    sample_gene_sequence[0:new_sequence_length]
+                                )
 
                                 #########################################################################################################################
                                 ### c/m: introducido para determinar qué umbral de coverage poner. TEMPORAL
-                                new_sseq_coverage = new_sequence_length/matching_allele_length ### introduciendo coverage new_sseq /// debería ser con respecto a la media?
+                                new_sseq_coverage = (
+                                    new_sequence_length / matching_allele_length
+                                )  ### introduciendo coverage new_sseq /// debería ser con respecto a la media?
 
                                 if new_sseq_coverage < 1:
-                                    shorter_seq_coverage.append([core_name, sample_name, str(matching_allele_length), str(new_sequence_length), str(schema_statistics[core_name][0]), str(new_sseq_coverage), str(new_sequence_length/schema_statistics[core_name][0])])
+                                    shorter_seq_coverage.append(
+                                        [
+                                            core_name,
+                                            sample_name,
+                                            str(matching_allele_length),
+                                            str(new_sequence_length),
+                                            str(schema_statistics[core_name][0]),
+                                            str(new_sseq_coverage),
+                                            str(
+                                                new_sequence_length
+                                                / schema_statistics[core_name][0]
+                                            ),
+                                        ]
+                                    )
                                 elif new_sseq_coverage > 1:
-                                    longer_seq_coverage.append([core_name, sample_name, str(matching_allele_length), str(new_sequence_length), str(schema_statistics[core_name][0]), str(new_sseq_coverage), str(new_sequence_length/schema_statistics[core_name][0])])
+                                    longer_seq_coverage.append(
+                                        [
+                                            core_name,
+                                            sample_name,
+                                            str(matching_allele_length),
+                                            str(new_sequence_length),
+                                            str(schema_statistics[core_name][0]),
+                                            str(new_sseq_coverage),
+                                            str(
+                                                new_sequence_length
+                                                / schema_statistics[core_name][0]
+                                            ),
+                                        ]
+                                    )
                                 elif new_sseq_coverage == 1:
-                                    equal_seq_coverage.append([core_name, sample_name, str(matching_allele_length), str(new_sequence_length), str(schema_statistics[core_name][0]), str(new_sseq_coverage), str(new_sequence_length/schema_statistics[core_name][0])])
+                                    equal_seq_coverage.append(
+                                        [
+                                            core_name,
+                                            sample_name,
+                                            str(matching_allele_length),
+                                            str(new_sequence_length),
+                                            str(schema_statistics[core_name][0]),
+                                            str(new_sseq_coverage),
+                                            str(
+                                                new_sequence_length
+                                                / schema_statistics[core_name][0]
+                                            ),
+                                        ]
+                                    )
                                 #########################################################################################################################
 
                                 # Get and keep SNP and DNA and protein alignment
-                                keep_snp_alignment_info(sseq, new_sseq, matching_allele_seq, qseqid, query_direction, core_name, sample_name, reward, penalty, gapopen, gapextend, snp_dict, match_alignment_dict, protein_dict, logger)
+                                keep_snp_alignment_info(
+                                    sseq,
+                                    new_sseq,
+                                    matching_allele_seq,
+                                    qseqid,
+                                    query_direction,
+                                    core_name,
+                                    sample_name,
+                                    reward,
+                                    penalty,
+                                    gapopen,
+                                    gapextend,
+                                    snp_dict,
+                                    match_alignment_dict,
+                                    protein_dict,
+                                    logger,
+                                )
 
                                 # ····································································································· #
                                 # INF if final new sequence length is within min and max length thresholds for this gene in this sample #
                                 # ····································································································· #
-                                if min_length_threshold <= new_sequence_length <= max_length_threshold:
-
+                                if (
+                                    min_length_threshold
+                                    <= new_sequence_length
+                                    <= max_length_threshold
+                                ):
                                     # Keep INF info
-                                    inf_asm_alm_tag(core_name, sample_name, 'INF', allele_found[allele_id], allele_quality, new_sseq, matching_allele_length, inferred_alleles_dict, inf_dict, samples_matrix_dict, matching_genes_dict, prodigal_report, start_prodigal, end_prodigal, complete_predicted_seq, annotation_core_dict, count_inf, logger) ### introducido start_prodigal, end_prodigal, complete_predicted_seq, prodigal_report como argumento a inf_asm_alm_tag para report prodigal, temporal
+                                    inf_asm_alm_tag(
+                                        core_name,
+                                        sample_name,
+                                        "INF",
+                                        allele_found[allele_id],
+                                        allele_quality,
+                                        new_sseq,
+                                        matching_allele_length,
+                                        inferred_alleles_dict,
+                                        inf_dict,
+                                        samples_matrix_dict,
+                                        matching_genes_dict,
+                                        prodigal_report,
+                                        start_prodigal,
+                                        end_prodigal,
+                                        complete_predicted_seq,
+                                        annotation_core_dict,
+                                        count_inf,
+                                        logger,
+                                    )  ### introducido start_prodigal, end_prodigal, complete_predicted_seq, prodigal_report como argumento a inf_asm_alm_tag para report prodigal, temporal
 
                                 # ············································································································································ #
                                 # ASM if final new sequence length is under min length threshold but its coverage is above min coverage threshold for this gene in this sample #
                                 # ············································································································································ #
-                                elif min_coverage_threshold <= new_sequence_length < min_length_threshold:
-
+                                elif (
+                                    min_coverage_threshold
+                                    <= new_sequence_length
+                                    < min_length_threshold
+                                ):
                                     # Keep ASM info
-                                    inf_asm_alm_tag(core_name, sample_name, 'ASM', allele_found[allele_id], allele_quality, new_sseq, matching_allele_length, asm_dict, list_asm, samples_matrix_dict, matching_genes_dict, prodigal_report, start_prodigal, end_prodigal, complete_predicted_seq, annotation_core_dict, count_asm, logger)
+                                    inf_asm_alm_tag(
+                                        core_name,
+                                        sample_name,
+                                        "ASM",
+                                        allele_found[allele_id],
+                                        allele_quality,
+                                        new_sseq,
+                                        matching_allele_length,
+                                        asm_dict,
+                                        list_asm,
+                                        samples_matrix_dict,
+                                        matching_genes_dict,
+                                        prodigal_report,
+                                        start_prodigal,
+                                        end_prodigal,
+                                        complete_predicted_seq,
+                                        annotation_core_dict,
+                                        count_asm,
+                                        logger,
+                                    )
 
                                 # ············································································································································ #
                                 # ALM if final new sequence length is above max length threshold but its coverage is under max coverage threshold for this gene in this sample #
                                 # ············································································································································ #
-                                elif max_length_threshold < new_sequence_length <= max_coverage_threshold:
-
+                                elif (
+                                    max_length_threshold
+                                    < new_sequence_length
+                                    <= max_coverage_threshold
+                                ):
                                     # Keep ALM info
-                                    inf_asm_alm_tag(core_name, sample_name, 'ALM', allele_found[allele_id], allele_quality, new_sseq, matching_allele_length, alm_dict, list_alm, samples_matrix_dict, matching_genes_dict, prodigal_report, start_prodigal, end_prodigal, complete_predicted_seq, annotation_core_dict, count_alm, logger) ### introducido start_prodigal, end_prodigal, complete_predicted_seq, prodigal_report como argumento a inf_asm_alm_tag para report prodigal, temporal
+                                    inf_asm_alm_tag(
+                                        core_name,
+                                        sample_name,
+                                        "ALM",
+                                        allele_found[allele_id],
+                                        allele_quality,
+                                        new_sseq,
+                                        matching_allele_length,
+                                        alm_dict,
+                                        list_alm,
+                                        samples_matrix_dict,
+                                        matching_genes_dict,
+                                        prodigal_report,
+                                        start_prodigal,
+                                        end_prodigal,
+                                        complete_predicted_seq,
+                                        annotation_core_dict,
+                                        count_alm,
+                                        logger,
+                                    )  ### introducido start_prodigal, end_prodigal, complete_predicted_seq, prodigal_report como argumento a inf_asm_alm_tag para report prodigal, temporal
 
                                 # ························································································· #
                                 # TPR if final new sequence coverage is not within thresholds for this gene in this sample  #
                                 # ························································································· #
                                 else:
-
                                     # Keep TPR info
-                                    lnf_tpr_tag(core_name, sample_name, alleles_in_locus_dict, samples_matrix_dict, lnf_tpr_dict, schema_statistics, locus_alleles_path, qseqid, pident, s_length_no_gaps, new_sequence_length, '-', coverage, schema_quality, annotation_core_dict, count_tpr, logger)
+                                    lnf_tpr_tag(
+                                        core_name,
+                                        sample_name,
+                                        alleles_in_locus_dict,
+                                        samples_matrix_dict,
+                                        lnf_tpr_dict,
+                                        schema_statistics,
+                                        locus_alleles_path,
+                                        qseqid,
+                                        pident,
+                                        s_length_no_gaps,
+                                        new_sequence_length,
+                                        "-",
+                                        coverage,
+                                        schema_quality,
+                                        annotation_core_dict,
+                                        count_tpr,
+                                        logger,
+                                    )
 
                             # ········································ #
                             # ERROR if final new sequence is not found #
                             # ········································ #
                             else:
-                                logger.error('ERROR : Stop codon was not found for the core %s and the sample %s', core_name, sample_name)
-                                samples_matrix_dict[sample_name].append('ERROR not stop codon')
-                                if not sseqid in matching_genes_dict[sample_name] :
+                                logger.error(
+                                    "ERROR : Stop codon was not found for the core %s and the sample %s",
+                                    core_name,
+                                    sample_name,
+                                )
+                                samples_matrix_dict[sample_name].append(
+                                    "ERROR not stop codon"
+                                )
+                                if not sseqid in matching_genes_dict[sample_name]:
                                     matching_genes_dict[sample_name][sseqid] = []
-                                if sstart > send :
-                                    #matching_genes_dict[sample_name][sseqid].append([core_name, sstart, send,'-', 'ERROR'])
-                                    matching_genes_dict[sample_name][sseqid].append([core_name, qseqid, sstart, send,'-', 'ERROR'])
+                                if sstart > send:
+                                    # matching_genes_dict[sample_name][sseqid].append([core_name, sstart, send,'-', 'ERROR'])
+                                    matching_genes_dict[sample_name][sseqid].append(
+                                        [core_name, qseqid, sstart, send, "-", "ERROR"]
+                                    )
                                 else:
-                                    #matching_genes_dict[sample_name][sseqid].append([core_name, sstart,send,'+', 'ERROR'])
-                                    matching_genes_dict[sample_name][sseqid].append([core_name, qseqid, sstart, send,'+', 'ERROR'])
+                                    # matching_genes_dict[sample_name][sseqid].append([core_name, sstart,send,'+', 'ERROR'])
+                                    matching_genes_dict[sample_name][sseqid].append(
+                                        [core_name, qseqid, sstart, send, "+", "ERROR"]
+                                    )
 
                                 # (recuento tags para plot)
-                                count_error[sample_name]['total'] += 1
+                                count_error[sample_name]["total"] += 1
                                 for count_class in count_error[sample_name]:
                                     if count_class in allele_quality:
-                                        if "no_start_stop" not in count_class and "no_start_stop" in allele_quality:
+                                        if (
+                                            "no_start_stop" not in count_class
+                                            and "no_start_stop" in allele_quality
+                                        ):
                                             if count_class == "bad_quality":
-                                                count_error[sample_name][count_class] += 1
+                                                count_error[sample_name][
+                                                    count_class
+                                                ] += 1
                                         else:
                                             count_error[sample_name][count_class] += 1
 
-
     ## Save results and create reports
 
-    if not save_allele_call_results (outputdir, full_gene_list, samples_matrix_dict, exact_dict, paralog_dict, inf_dict, plot_dict, matching_genes_dict, list_asm, list_alm, lnf_tpr_dict, snp_dict, match_alignment_dict, protein_dict, prodigal_report, shorter_seq_coverage, longer_seq_coverage, equal_seq_coverage, shorter_blast_seq_coverage, longer_blast_seq_coverage, equal_blast_seq_coverage, logger):
-        print('There is an error while saving the allele calling results. Check the log file to get more information \n')
-       # exit(0)
-
+    if not save_allele_call_results(
+        outputdir,
+        full_gene_list,
+        samples_matrix_dict,
+        exact_dict,
+        paralog_dict,
+        inf_dict,
+        plot_dict,
+        matching_genes_dict,
+        list_asm,
+        list_alm,
+        lnf_tpr_dict,
+        snp_dict,
+        match_alignment_dict,
+        protein_dict,
+        prodigal_report,
+        shorter_seq_coverage,
+        longer_seq_coverage,
+        equal_seq_coverage,
+        shorter_blast_seq_coverage,
+        longer_blast_seq_coverage,
+        equal_blast_seq_coverage,
+        logger,
+    ):
+        print(
+            "There is an error while saving the allele calling results. Check the log file to get more information \n"
+        )
+    # exit(0)
 
     ## Saving sample results plots
 
-    if not save_allele_calling_plots (outputdir, sample_list_files, count_exact, count_inf, count_asm, count_alm, count_lnf, count_tpr, count_plot, count_niph, count_niphem, count_error, logger):
-        print('There is an error while saving the allele calling results plots. Check the log file to get more information \n')
-
+    if not save_allele_calling_plots(
+        outputdir,
+        sample_list_files,
+        count_exact,
+        count_inf,
+        count_asm,
+        count_alm,
+        count_lnf,
+        count_tpr,
+        count_plot,
+        count_niph,
+        count_niphem,
+        count_error,
+        logger,
+    ):
+        print(
+            "There is an error while saving the allele calling results plots. Check the log file to get more information \n"
+        )
 
     return True, inferred_alleles_dict, inf_dict, exact_dict
 
@@ -2326,8 +4127,9 @@ def allele_call_nucleotides (core_gene_list_files, sample_list_files, alleles_in
 # Processing gene by gene allele calling #
 # * * * * * * * * * * * * * * * * * * *  #
 
-def processing_allele_calling (arguments) :
-    '''
+
+def processing_allele_calling(arguments):
+    """
     Description:
         This is the main function for allele calling.
         With the support of additional functions it will create the output files
@@ -2340,93 +4142,145 @@ def processing_allele_calling (arguments) :
         ????
     Return:
         ????
-    '''
+    """
 
     start_time = datetime.now()
-    print('Start the execution at :', start_time )
+    print("Start the execution at :", start_time)
 
     # Open log file
-    logger = open_log ('taranis_wgMLST.log')
-    #print('Checking the pre-requisites.')
+    logger = open_log("taranis_wgMLST.log")
+    # print('Checking the pre-requisites.')
 
     ############################################################
     ## Check additional programs are installed in your system ##
     ############################################################
-    #pre_requisites_list = [['blastp', '2.9'], ['makeblastdb', '2.9']]
-    #if not check_prerequisites (pre_requisites_list, logger):
+    # pre_requisites_list = [['blastp', '2.9'], ['makeblastdb', '2.9']]
+    # if not check_prerequisites (pre_requisites_list, logger):
     #    print ('your system does not fulfill the pre-requistes to run the script ')
     #    exit(0)
 
     ######################################################
     ## Check that given directories contain fasta files ##
     ######################################################
-    print('Validating schema fasta files in ' , arguments.coregenedir , '\n')
+    print("Validating schema fasta files in ", arguments.coregenedir, "\n")
     valid_core_gene_files = get_fasta_file_list(arguments.coregenedir, logger)
-    if not valid_core_gene_files :
-        print ('There are not valid fasta files in ',  arguments.coregenedir , ' directory. Check log file for more information ')
+    if not valid_core_gene_files:
+        print(
+            "There are not valid fasta files in ",
+            arguments.coregenedir,
+            " directory. Check log file for more information ",
+        )
         exit(0)
 
-    print('Validating reference alleles fasta files in ' , arguments.refalleles , '\n')
+    print("Validating reference alleles fasta files in ", arguments.refalleles, "\n")
     valid_reference_alleles_files = get_fasta_file_list(arguments.refalleles, logger)
-    if not valid_reference_alleles_files :
-        print ('There are not valid reference alleles fasta files in ',  arguments.refalleles, ' directory. Check log file for more information ')
+    if not valid_reference_alleles_files:
+        print(
+            "There are not valid reference alleles fasta files in ",
+            arguments.refalleles,
+            " directory. Check log file for more information ",
+        )
         exit(0)
 
-    print('Validating sample fasta files in ' , arguments.inputdir , '\n')
+    print("Validating sample fasta files in ", arguments.inputdir, "\n")
     valid_sample_files = get_fasta_file_list(arguments.inputdir, logger)
-    if not valid_sample_files :
-        print ('There are not valid fasta files in ',  arguments.inputdir , ' directory. Check log file for more information ')
+    if not valid_sample_files:
+        print(
+            "There are not valid fasta files in ",
+            arguments.inputdir,
+            " directory. Check log file for more information ",
+        )
         exit(0)
 
     #################################
     ## Prepare the coreMLST schema ##
     #################################
-    tmp_core_gene_dir = os.path.join(arguments.outputdir,'tmp','cgMLST')
+    tmp_core_gene_dir = os.path.join(arguments.outputdir, "tmp", "cgMLST")
     try:
         os.makedirs(tmp_core_gene_dir)
     except:
-        logger.info('Deleting the temporary directory for a previous execution without cleaning up')
-        shutil.rmtree(os.path.join(arguments.outputdir, 'tmp'))
+        logger.info(
+            "Deleting the temporary directory for a previous execution without cleaning up"
+        )
+        shutil.rmtree(os.path.join(arguments.outputdir, "tmp"))
         try:
             os.makedirs(tmp_core_gene_dir)
-            logger.info ('Temporary folder %s  has been created again', tmp_core_gene_dir)
+            logger.info(
+                "Temporary folder %s  has been created again", tmp_core_gene_dir
+            )
         except:
-            logger.info('Unable to create again the temporary directory %s', tmp_core_gene_dir)
-            print('Cannot create temporary directory on ', tmp_core_gene_dir)
+            logger.info(
+                "Unable to create again the temporary directory %s", tmp_core_gene_dir
+            )
+            print("Cannot create temporary directory on ", tmp_core_gene_dir)
             exit(0)
 
-    alleles_in_locus_dict, annotation_core_dict, schema_variability, schema_statistics, schema_quality = prepare_core_gene (valid_core_gene_files, tmp_core_gene_dir, arguments.refalleles, arguments.genus, arguments.species, str(arguments.usegenus).lower(), logger)
-    #alleles_in_locus_dict, annotation_core_dict, schema_variability, schema_statistics, schema_quality = prepare_core_gene (valid_core_gene_files, tmp_core_gene_dir, arguments.refalleles, arguments.outputdir, logger)
+    (
+        alleles_in_locus_dict,
+        annotation_core_dict,
+        schema_variability,
+        schema_statistics,
+        schema_quality,
+    ) = prepare_core_gene(
+        valid_core_gene_files,
+        tmp_core_gene_dir,
+        arguments.refalleles,
+        arguments.genus,
+        arguments.species,
+        str(arguments.usegenus).lower(),
+        logger,
+    )
+    # alleles_in_locus_dict, annotation_core_dict, schema_variability, schema_statistics, schema_quality = prepare_core_gene (valid_core_gene_files, tmp_core_gene_dir, arguments.refalleles, arguments.outputdir, logger)
     if not alleles_in_locus_dict:
-        print('There is an error while processing the schema preparation phase. Check the log file to get more information \n')
-        logger.info('Deleting the temporary directory to clean up the temporary files created')
-        shutil.rmtree(os.path.join(arguments.outputdir, 'tmp'))
+        print(
+            "There is an error while processing the schema preparation phase. Check the log file to get more information \n"
+        )
+        logger.info(
+            "Deleting the temporary directory to clean up the temporary files created"
+        )
+        shutil.rmtree(os.path.join(arguments.outputdir, "tmp"))
         exit(0)
 
     ###############################
     ## Prepare the samples files ##
     ###############################
-    tmp_samples_dir = os.path.join(arguments.outputdir,'tmp','samples')
+    tmp_samples_dir = os.path.join(arguments.outputdir, "tmp", "samples")
     try:
         os.makedirs(tmp_samples_dir)
     except:
-        logger.info('Deleting the temporary directory for a previous execution without properly cleaning up')
+        logger.info(
+            "Deleting the temporary directory for a previous execution without properly cleaning up"
+        )
         shutil.rmtree(tmp_samples_dir)
         try:
             os.makedirs(tmp_samples_dir)
-            logger.info('Temporary folder %s  has been created again', tmp_samples_dir)
+            logger.info("Temporary folder %s  has been created again", tmp_samples_dir)
         except:
-            logger.info('Unable to create again the temporary directory %s', tmp_samples_dir)
-            shutil.rmtree(os.path.join(arguments.outputdir, 'tmp'))
-            logger.info('Cleaned up temporary directory ', )
-            print('Cannot create temporary directory on ', tmp_samples_dir, 'Check the log file to get more information \n')
+            logger.info(
+                "Unable to create again the temporary directory %s", tmp_samples_dir
+            )
+            shutil.rmtree(os.path.join(arguments.outputdir, "tmp"))
+            logger.info(
+                "Cleaned up temporary directory ",
+            )
+            print(
+                "Cannot create temporary directory on ",
+                tmp_samples_dir,
+                "Check the log file to get more information \n",
+            )
             exit(0)
 
-    contigs_in_sample_dict = prepare_samples(valid_sample_files, tmp_samples_dir, arguments.refgenome, logger)
-    if not contigs_in_sample_dict :
-        print('There is an error while processing the saving temporary files. Check the log file to get more information \n')
-        logger.info('Deleting the temporary directory to clean up the temporary files created')
-        shutil.rmtree(os.path.join(arguments.outputdir, 'tmp'))
+    contigs_in_sample_dict = prepare_samples(
+        valid_sample_files, tmp_samples_dir, arguments.refgenome, logger
+    )
+    if not contigs_in_sample_dict:
+        print(
+            "There is an error while processing the saving temporary files. Check the log file to get more information \n"
+        )
+        logger.info(
+            "Deleting the temporary directory to clean up the temporary files created"
+        )
+        shutil.rmtree(os.path.join(arguments.outputdir, "tmp"))
         exit(0)
 
     ##################################
@@ -2434,51 +4288,126 @@ def processing_allele_calling (arguments) :
     ##################################
     query_directory = arguments.coregenedir
     reference_alleles_directory = arguments.refalleles
-    blast_db_directory = os.path.join(tmp_samples_dir,'blastdb')
-    prodigal_directory = os.path.join(tmp_samples_dir,'prodigal')
-    blast_results_seq_directory = os.path.join(tmp_samples_dir,'blast_results', 'blast_results_seq')  ### path a directorio donde guardar secuencias encontradas tras blast con alelo de referencia
-    blast_results_db_directory = os.path.join(tmp_samples_dir,'blast_results', 'blast_results_db') ### path a directorio donde guardar db de secuencias encontradas tras blast con alelo de referencia
-
-    complete_allele_call, inferred_alleles_dict, inf_dict, exact_dict = allele_call_nucleotides(valid_core_gene_files, valid_sample_files, alleles_in_locus_dict, contigs_in_sample_dict, query_directory, reference_alleles_directory, blast_db_directory, prodigal_directory, blast_results_seq_directory, blast_results_db_directory, arguments.inputdir, arguments.outputdir,  int(arguments.cpus), arguments.percentlength, arguments.coverage, float(arguments.evalue), int(arguments.perc_identity_ref), int(arguments.perc_identity_loc), int(arguments.reward), int(arguments.penalty), int(arguments.gapopen), int(arguments.gapextend), int(arguments.max_target_seqs), int(arguments.max_hsps), int(arguments.num_threads), int(arguments.flankingnts), schema_variability, schema_statistics, schema_quality, annotation_core_dict, arguments.profile, logger)
+    blast_db_directory = os.path.join(tmp_samples_dir, "blastdb")
+    prodigal_directory = os.path.join(tmp_samples_dir, "prodigal")
+    blast_results_seq_directory = os.path.join(
+        tmp_samples_dir, "blast_results", "blast_results_seq"
+    )  ### path a directorio donde guardar secuencias encontradas tras blast con alelo de referencia
+    blast_results_db_directory = os.path.join(
+        tmp_samples_dir, "blast_results", "blast_results_db"
+    )  ### path a directorio donde guardar db de secuencias encontradas tras blast con alelo de referencia
+
+    (
+        complete_allele_call,
+        inferred_alleles_dict,
+        inf_dict,
+        exact_dict,
+    ) = allele_call_nucleotides(
+        valid_core_gene_files,
+        valid_sample_files,
+        alleles_in_locus_dict,
+        contigs_in_sample_dict,
+        query_directory,
+        reference_alleles_directory,
+        blast_db_directory,
+        prodigal_directory,
+        blast_results_seq_directory,
+        blast_results_db_directory,
+        arguments.inputdir,
+        arguments.outputdir,
+        int(arguments.cpus),
+        arguments.percentlength,
+        arguments.coverage,
+        float(arguments.evalue),
+        int(arguments.perc_identity_ref),
+        int(arguments.perc_identity_loc),
+        int(arguments.reward),
+        int(arguments.penalty),
+        int(arguments.gapopen),
+        int(arguments.gapextend),
+        int(arguments.max_target_seqs),
+        int(arguments.max_hsps),
+        int(arguments.num_threads),
+        int(arguments.flankingnts),
+        schema_variability,
+        schema_statistics,
+        schema_quality,
+        annotation_core_dict,
+        arguments.profile,
+        logger,
+    )
     if not complete_allele_call:
-        print('There is an error while processing the allele calling. Check the log file to get more information \n')
+        print(
+            "There is an error while processing the allele calling. Check the log file to get more information \n"
+        )
         exit(0)
 
     #########################################################
     ## Update core gene schema adding new inferred alleles ##
     #########################################################
     if inferred_alleles_dict:
-        if str(arguments.updateschema).lower() == 'true' or str(arguments.updateschema).lower() == 'new':
-            if not update_schema (str(arguments.updateschema).lower(), arguments.coregenedir, arguments.outputdir, valid_core_gene_files, inferred_alleles_dict, alleles_in_locus_dict, logger):
-                print('There is an error adding new inferred alleles found to the core genes schema. Check the log file to get more information \n')
+        if (
+            str(arguments.updateschema).lower() == "true"
+            or str(arguments.updateschema).lower() == "new"
+        ):
+            if not update_schema(
+                str(arguments.updateschema).lower(),
+                arguments.coregenedir,
+                arguments.outputdir,
+                valid_core_gene_files,
+                inferred_alleles_dict,
+                alleles_in_locus_dict,
+                logger,
+            ):
+                print(
+                    "There is an error adding new inferred alleles found to the core genes schema. Check the log file to get more information \n"
+                )
                 exit(0)
 
-    if str(arguments.profile).lower() != 'false':
-
+    if str(arguments.profile).lower() != "false":
         ############################
         ## Get ST for each sample ##
         ############################
-        complete_ST, inf_ST = get_ST_profile(arguments.outputdir, arguments.profile, exact_dict, inf_dict, valid_core_gene_files, valid_sample_files, logger)
+        complete_ST, inf_ST = get_ST_profile(
+            arguments.outputdir,
+            arguments.profile,
+            exact_dict,
+            inf_dict,
+            valid_core_gene_files,
+            valid_sample_files,
+            logger,
+        )
 
         if not complete_ST:
-            print('There is an error while processing ST analysis. Check the log file to get more information \n')
+            print(
+                "There is an error while processing ST analysis. Check the log file to get more information \n"
+            )
             exit(0)
 
         ###########################################
         ## Update ST profile file adding new STs ##
         ###########################################
-        if str(arguments.updateprofile).lower() == 'true' or str(arguments.updateprofile).lower() == 'new':
+        if (
+            str(arguments.updateprofile).lower() == "true"
+            or str(arguments.updateprofile).lower() == "new"
+        ):
             if len(inf_ST) > 0:
-                if not update_st_profile (str(arguments.updateprofile).lower(), arguments.profile, arguments.outputdir, inf_ST, valid_core_gene_files, logger):
-                    print('There is an error adding new STs found to the ST profile file. Check the log file to get more information \n')
+                if not update_st_profile(
+                    str(arguments.updateprofile).lower(),
+                    arguments.profile,
+                    arguments.outputdir,
+                    inf_ST,
+                    valid_core_gene_files,
+                    logger,
+                ):
+                    print(
+                        "There is an error adding new STs found to the ST profile file. Check the log file to get more information \n"
+                    )
                     exit(0)
 
-    shutil.rmtree(os.path.join(arguments.outputdir, 'tmp'))
+    shutil.rmtree(os.path.join(arguments.outputdir, "tmp"))
 
     end_time = datetime.now()
-    print('completed execution at :', end_time )
+    print("completed execution at :", end_time)
 
     return True
-
-
-
diff --git a/taranis/analyze_schema.py b/taranis/analyze_schema.py
index cf218bf..c17fd99 100644
--- a/taranis/analyze_schema.py
+++ b/taranis/analyze_schema.py
@@ -4,9 +4,12 @@
 import rich.console
 import statistics
 from pathlib import Path
+import Bio.Data.CodonTable
 
 from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
+
+# from Bio.SeqRecord import SeqRecord
+from collections import OrderedDict
 
 import taranis.utils
 
@@ -43,66 +46,89 @@ def __init__(
         self.species = species
         self.usegenus = usegenus
 
-    def check_allele_quality(self):
-        a_quality = {}
+    def check_allele_quality(self, prokka_annotation):
+        a_quality = OrderedDict()
         allele_seq = {}
         bad_quality_record = []
         with open(self.schema_allele) as fh:
             for record in SeqIO.parse(self.schema_allele, "fasta"):
-                a_quality[record.id] = {"quality": "Good quality", "reason": "-"}
+                try:
+                    prokka_ann = prokka_annotation[record.id]
+                except:
+                    prokka_ann = "Not found in prokka"
+                a_quality[record.id] = {
+                    "allele_name": self.allele_name,
+                    "quality": "Good quality",
+                    "reason": "-",
+                    "direction": "forward",
+                    "start_codon_alt": "standard",
+                    "protein_seq": "",
+                    "cds_coding": prokka_ann,
+                }
                 allele_seq[record.id] = str(record.seq)
-                a_quality[record.id]["length"] = len(str(record.seq))
-                if len(record.seq) % 3 != 0:
-                    a_quality[record.id]["quality"] = "Bad quality"
-                    a_quality[record.id]["reason"] = "Can not be converted to protein"
-                    a_quality[record.id]["order"] = "-"
-                else:
-                    sequence_order = taranis.utils.check_sequence_order(str(record.seq))
-                    if sequence_order == "Error":
-                        a_quality[record.id]["quality"] = "Bad quality"
-                        a_quality[record.id]["reason"] = "Start or end codon not found"
-                        a_quality[record.id]["order"] = "-"
-                    elif sequence_order == "reverse":
-                        record_sequence = str(record.seq.reverse_complement())
+                a_quality[record.id]["length"] = str(len(str(record.seq)))
+                a_quality[record.id]["dna_seq"] = str(record.seq)
+                sequence_direction = taranis.utils.get_seq_direction(str(record.seq))
+
+                if sequence_direction == "reverse":
+                    record.seq = record.seq.reverse_complement()
+                    a_quality[record.id]["direction"] = sequence_direction
+                elif sequence_direction == "Error":
+                    a_quality[record.id]["direction"] = "-"
+                try:
+                    a_quality[record.id]["protein_seq"] = str(
+                        record.seq.translate(table=1, cds=True)
+                    )
+
+                except Bio.Data.CodonTable.TranslationError as e:
+                    if "not a start codon" in str(e):
+                        try:
+                            # Check if sequence has an alternative start codon
+                            # for protein coding
+                            a_quality[record.id]["protein_seq"] = str(
+                                record.seq.translate(table=2, cds=True)
+                            )
+                            a_quality[record.id]["start_codon_alt"] = "alternative"
+                        except Bio.Data.CodonTable.TranslationError as e_2:
+                            if "stop" in str(e_2):
+                                a_quality[record.id]["reason"] = str(e_2).replace(
+                                    "'", ""
+                                )
+                            else:
+                                a_quality[record.id]["reason"] = str(e).replace("'", "")
+                            a_quality[record.id]["quality"] = "Bad quality"
                     else:
-                        record_sequence = str(record.seq)
-                    a_quality[record.id]["order"] = sequence_order
-                    if record_sequence[0:3] not in taranis.utils.START_CODON_FORWARD:
                         a_quality[record.id]["quality"] = "Bad quality"
-                        a_quality[record.id]["reason"] = "Start codon not found"
-                    elif record_sequence[-3:] not in taranis.utils.STOP_CODON_FORWARD:
-                        a_quality[record.id]["quality"] = "Bad quality"
-                        a_quality[record.id]["reason"] = "Stop codon not found"
-                    
-                    elif taranis.utils.find_multiple_stop_codons(record_sequence):
-                        a_quality[record.id]["quality"] = "Bad quality"
-                        a_quality[record.id]["reason"] = "Multiple stop codons found"
+                        a_quality[record.id]["reason"] = str(e).replace("'", "")
+
                 if (
                     self.remove_no_cds
                     and a_quality[record.id]["quality"] == "Bad quality"
                 ):
                     bad_quality_record.append(record.id)
 
-        if self.remove_duplicated:
-            # get the unique sequences and compare the length with all sequences
-            unique_seq = list(set(list(allele_seq.values())))
-            if len(unique_seq) < len(allele_seq):
-                tmp_dict = {}
-                for rec_id, seq_value in allele_seq.items():
-                    if seq_value not in tmp_dict:
-                        tmp_dict[seq_value] = 0
-                    else:
-                        bad_quality_record.append(rec_id)
-                        a_quality[rec_id]["quality"] ="Bad quality"
-                        a_quality[rec_id]["reason"] ="Duplicate allele"
-        if self.remove_subset:
-            unique_seq = list(set(list(allele_seq.values())))
+        # check if there are duplicated alleles
+        # get the unique sequences and compare the length with all sequences
+        unique_seq = list(set(list(allele_seq.values())))
+        if len(unique_seq) < len(allele_seq):
+            tmp_dict = {}
             for rec_id, seq_value in allele_seq.items():
-                unique_seq.remove(seq_value)
-                if seq_value in unique_seq:
+                if seq_value not in tmp_dict:
+                    tmp_dict[seq_value] = 0
+                else:
+                    a_quality[rec_id]["quality"] = "Bad quality"
+                    a_quality[rec_id]["reason"] = "Duplicate allele"
+                    if self.remove_duplicated:
+                        bad_quality_record.append(rec_id)
+
+        for rec_id, seq_value in allele_seq.items():
+            unique_seq.remove(seq_value)
+            if seq_value in unique_seq:
+                a_quality[rec_id]["quality"] = "Bad quality"
+                a_quality[rec_id]["reason"] = "Sub set allele"
+                if self.remove_subset:
                     bad_quality_record.append(rec_id)
-                    a_quality[rec_id]["quality"] ="Bad quality"
-                    a_quality[rec_id]["reason"] ="Sub set allele"
+
         new_schema_folder = os.path.join(self.output, "new_schema")
         _ = taranis.utils.create_new_folder(new_schema_folder)
         new_schema_file = os.path.join(new_schema_folder, self.allele_name + ".fasta")
@@ -116,22 +142,38 @@ def check_allele_quality(self):
                         SeqIO.write(record, fo, "fasta")
         # update the schema allele with the new file
         self.schema_allele = new_schema_file
+
+        """
+        if self.output_allele_annot:
+            # dump allele annotation to file 
+            ann_heading = ["gene", "allele", "allele direction","nucleotide sequence", "protein sequence", "nucleotide sequence length", "star codon", "CDS coding", "allele quality", "bad quality reason" ]
+            ann_fields = ["direction", "dna_seq", "protein_seq", "length", "start_codon_alt","cds_coding", "quality", "reason"]
+            f_name = os.path.join(self.output, self.allele_name +"_allele_annotation.csv")
+            with open (f_name, "w") as fo:
+                fo.write(",".join(ann_heading) + "\n")
+                for allele in a_quality.keys():
+                    data_field = [a_quality[allele][field] for field in ann_fields]
+                    fo.write(self.allele_name + "," + allele + "," + ",".join(data_field) + "\n")
+        """
+
         return a_quality
 
     def fetch_statistics_from_alleles(self, a_quality):
-        possible_bad_quality = ["Can not be converted to protein", "Start codon not found", "Stop codon not found", "Multiple stop codons found" ,"Duplicate allele", "Sub set allele"]
+        # POSIBLE_BAD_QUALITY = ["not a start codon", "not a stop codon", "Extra in frame stop codon", "is not a multiple of three", "Duplicate allele", "Sub set allele"]
         record_data = {}
         bad_quality_reason = {}
         a_length = []
         bad_quality_counter = 0
         for record_id in a_quality.keys():
             record_data["allele_name"] = self.allele_name
-            a_length.append(a_quality[record_id]["length"])
+            a_length.append(int(a_quality[record_id]["length"]))
             if a_quality[record_id]["quality"] == "Bad quality":
                 bad_quality_counter += 1
-            bad_quality_reason[a_quality[record_id]["reason"]] = (
-                bad_quality_reason.get(a_quality[record_id]["reason"], 0) + 1
-            )
+                for reason in taranis.utils.POSIBLE_BAD_QUALITY:
+                    if reason in a_quality[record_id]["reason"]:
+                        bad_quality_reason[reason] = (
+                            bad_quality_reason.get(reason, 0) + 1
+                        )
         total_alleles = len(a_length)
         record_data["min_length"] = min(a_length)
         record_data["max_length"] = max(a_length)
@@ -140,25 +182,26 @@ def fetch_statistics_from_alleles(self, a_quality):
         record_data["good_percent"] = round(
             100 * (total_alleles - bad_quality_counter) / total_alleles, 2
         )
-        for item in possible_bad_quality:
-            record_data[item] =  bad_quality_reason[item] if item in bad_quality_reason else 0
-        # record_data["bad_quality_reason"] = bad_quality_reason
+        for item in taranis.utils.POSIBLE_BAD_QUALITY:
+            record_data[item] = (
+                bad_quality_reason[item] if item in bad_quality_reason else 0
+            )
+
         return record_data
 
     def analyze_allele_in_schema(self):
         allele_data = {}
-        # Perform quality
-        a_quality = self.check_allele_quality()
         # run annotations
         prokka_folder = os.path.join(self.output, "prokka", self.allele_name)
         anotation_files = taranis.utils.create_annotation_files(
             self.schema_allele, prokka_folder, self.allele_name
         )
-        allele_data["annotation_gene"] = taranis.utils.read_annotation_file(
-            anotation_files + ".tsv", self.allele_name
-        ).get(self.allele_name)
-        allele_data.update(self.fetch_statistics_from_alleles(a_quality))
-        return allele_data
+        prokka_annotation = taranis.utils.read_annotation_file(anotation_files + ".gff")
+
+        # Perform quality
+        a_quality = self.check_allele_quality(prokka_annotation)
+        allele_data = self.fetch_statistics_from_alleles(a_quality)
+        return [allele_data, a_quality]
 
 
 def parallel_execution(
@@ -184,17 +227,30 @@ def parallel_execution(
     return schema_obj.analyze_allele_in_schema()
 
 
-
-def collect_statistics(stat_data, out_folder):
+def collect_statistics(data, out_folder, output_allele_annot):
     def stats_graphics(stats_folder):
-        print(out_folder)
+        allele_range = [0, 300, 600, 1000, 1500]
+
         graphic_folder = os.path.join(stats_folder, "graphics")
         _ = taranis.utils.create_new_folder(graphic_folder)
         # create graphic for alleles/number of genes
-        genes_alleles_df = stats_df["num_alleles"].value_counts().rename_axis("alleles").sort_index().reset_index(name="genes")
-        _ = taranis.utils.create_graphic(graphic_folder, "num_genes_per_allele.png", "lines", genes_alleles_df["alleles"].to_list(), genes_alleles_df["genes"].to_list(), ["Allele", "number of genes"],"title")
+        # genes_alleles_df = stats_df["num_alleles"].value_counts().rename_axis("alleles").sort_index().reset_index(name="genes")
+        group_alleles_df = stats_df.groupby(
+            pd.cut(stats_df["num_alleles"], allele_range)
+        ).count()
+        _ = taranis.utils.create_graphic(
+            graphic_folder,
+            "num_genes_per_allele.png",
+            "bar",
+            allele_range[1:],
+            group_alleles_df["num_alleles"].to_list(),
+            ["Allele", "number of genes"],
+            "title",
+        )
+        # _ = taranis.utils.create_graphic(graphic_folder, "num_genes_per_allele.png", "lines", genes_alleles_df["alleles"].to_list(), genes_alleles_df["genes"].to_list(), ["Allele", "number of genes"],"title")
         # create pie graph for good quality
-        
+
+        """
         good_percent = [round(stats_df["good_percent"].mean(),2)]
         good_percent.append(100 - good_percent[0])
         labels = ["Good quality", "Bad quality"]
@@ -202,25 +258,90 @@ def stats_graphics(stats_folder):
         _ = taranis.utils.create_graphic(graphic_folder, "quality_of_locus.png", "pie", good_percent, "", labels, "Quality of locus")
         # create pie graph for bad quality reason. This is optional if there are
         # bad quality alleles
+        """
+        sum_all_alleles = stats_df["num_alleles"].sum()
+
         labels = []
         values = []
         for item in taranis.utils.POSIBLE_BAD_QUALITY:
             labels.append(item)
             values.append(stats_df[item].sum())
-        if sum(values) > 0:
-             _ = taranis.utils.create_graphic(graphic_folder, "bad_quality_reason.png", "pie", values, "", labels, "Bad quality reason")
-        # create pie graph for not found gene name
+        labels.append("Good quality")
+        values.append(sum_all_alleles - sum(values))
+        _ = taranis.utils.create_graphic(
+            graphic_folder,
+            "quality_percent.png",
+            "pie",
+            values,
+            "",
+            labels,
+            "Quality percent",
+        )
+        # create box plot for allele length variability
+        _ = taranis.utils.create_graphic(
+            graphic_folder,
+            "allele_variability.png",
+            "box",
+            "",
+            stats_df["mean_length"].to_list(),
+            "",
+            "Allele variability",
+        )
+
+    summary_data = []
+    a_quality = []
+    for idx in range(len(data)):
         # pdb.set_trace()
-        times_not_found_gene = len(stats_df[stats_df["annotation_gene"] == "Not found by Prokka"])
-        if times_not_found_gene > 0:
-            gene_not_found = [times_not_found_gene, len(stat_data)]
-            labels = ["Not found gene name", "Number of alleles"]
-            _ = taranis.utils.create_graphic(graphic_folder, "gene_not_found.png", "pie", gene_not_found, "", labels, "Quality of locus")
-    
-    stats_df = pd.DataFrame(stat_data)
+        summary_data.append(data[idx][0])
+        a_quality.append(data[idx][1])
+
+    stats_df = pd.DataFrame(summary_data)
+    # a_quality = data[1]
     stats_folder = os.path.join(out_folder, "statistics")
     _ = taranis.utils.create_new_folder(stats_folder)
     _ = taranis.utils.write_data_to_file(stats_folder, "statistics.csv", stats_df)
+    # pdb.set_trace()
     stats_graphics(stats_folder)
 
-    print(stats_df)
+    if output_allele_annot:
+        # dump allele annotation to file
+        ann_heading = [
+            "gene",
+            "allele",
+            "allele direction",
+            "nucleotide sequence",
+            "protein sequence",
+            "nucleotide sequence length",
+            "star codon",
+            "CDS coding",
+            "allele quality",
+            "bad quality reason",
+        ]
+        ann_fields = [
+            "direction",
+            "dna_seq",
+            "protein_seq",
+            "length",
+            "start_codon_alt",
+            "cds_coding",
+            "quality",
+            "reason",
+        ]
+        # f_name = os.path.join(self.output, self.allele_name +"_allele_annotation.csv")
+        ann_data = ",".join(ann_heading) + "\n"
+        for gene in a_quality:
+            for allele in gene.keys():
+                data_field = [gene[allele][field] for field in ann_fields]
+                ann_data += (
+                    gene[allele]["allele_name"]
+                    + ","
+                    + allele
+                    + ","
+                    + ",".join(data_field)
+                    + "\n"
+                )
+
+        _ = taranis.utils.write_data_to_compress_filed(
+            out_folder, "allele_annotation.csv", ann_data
+        )
+    return
diff --git a/taranis/blast.py b/taranis/blast.py
index e4b17f0..923b640 100644
--- a/taranis/blast.py
+++ b/taranis/blast.py
@@ -17,29 +17,57 @@
 )
 
 
-class Blast():
+class Blast:
     def __init__(self, db_type):
         self.db_type = db_type
 
-    def create_blastdb (self, file_name, blast_dir):
+    def create_blastdb(self, file_name, blast_dir):
         self.f_name = Path(file_name).stem
-        db_dir = os.path.join(blast_dir,self.f_name)
+        db_dir = os.path.join(blast_dir, self.f_name)
         self.out_blast_dir = os.path.join(db_dir, self.f_name)
 
-        blast_command = ["makeblastdb" , "-in" , file_name , "-parse_seqids", "-dbtype",  self.db_type, "-out" , self.out_blast_dir]
+        blast_command = [
+            "makeblastdb",
+            "-in",
+            file_name,
+            "-parse_seqids",
+            "-dbtype",
+            self.db_type,
+            "-out",
+            self.out_blast_dir,
+        ]
         try:
-            _ = subprocess.run(blast_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+            _ = subprocess.run(
+                blast_command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                check=True,
+            )
         except Exception as e:
             log.error("Unable to create blast db for %s ", self.f_name)
             log.error(e)
-            stderr.print(f"[red] Unable to create blast database for sample %s", self.f_name)
+            stderr.print(
+                f"[red] Unable to create blast database for sample %s", self.f_name
+            )
             exit(1)
         return
-    
-    def run_blast(self, query, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, max_target_seqs=10, max_hsps=10, num_threads=1):
+
+    def run_blast(
+        self,
+        query,
+        evalue=0.001,
+        perc_identity=90,
+        reward=1,
+        penalty=-2,
+        gapopen=1,
+        gapextend=1,
+        max_target_seqs=10,
+        max_hsps=10,
+        num_threads=1,
+    ):
         """_summary_
             blastn -outfmt "6 , qseqid , sseqid , pident ,  qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq" -query /media/lchapado/Reference_data/proyectos_isciii/taranis/documentos_antiguos/pasteur_schema/lmo0002.fasta -db /media/lchapado/Reference_data/proyectos_isciii/taranis/test/blastdb/RA-L2073_R1/RA-L2073_R1 -evalue 0.001 -penalty -2 -reward 1 -gapopen 1 -gapextend 1  -perc_identity 100 > /media/lchapado/Reference_data/proyectos_isciii/taranis/test/blast_sample_locus002.txt
-        
+
         Args:
             query (_type_): _description_
             evalue (float, optional): _description_. Defaults to 0.001.
@@ -54,14 +82,28 @@ def run_blast(self, query, evalue=0.001, perc_identity=90, reward=1, penalty=-2,
         """
         blast_parameters = '"6 , qseqid , sseqid , pident ,  qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq"'
         pdb.set_trace()
-        #db=self.blast_dir, evalue=evalue, perc_identity=perc_identity_ref, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=core_reference_allele_path)
-        cline = NcbiblastnCommandline(db=self.out_blast_dir, evalue=evalue, perc_identity=perc_identity, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=query)
+        # db=self.blast_dir, evalue=evalue, perc_identity=perc_identity_ref, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=core_reference_allele_path)
+        cline = NcbiblastnCommandline(
+            db=self.out_blast_dir,
+            evalue=evalue,
+            perc_identity=perc_identity,
+            reward=reward,
+            penalty=penalty,
+            gapopen=gapopen,
+            gapextend=gapextend,
+            outfmt=blast_parameters,
+            max_target_seqs=max_target_seqs,
+            max_hsps=max_hsps,
+            num_threads=num_threads,
+            query=query,
+        )
         try:
             out, _ = cline()
         except Exception as e:
             log.error("Unable to run blast for %s ", self.out_blast_dir)
             log.error(e)
-            stderr.print(f"[red] Unable to run blast for database %s", self.out_blast_dir)
+            stderr.print(
+                f"[red] Unable to run blast for database %s", self.out_blast_dir
+            )
             exit(1)
         return out.splitlines()
-         
\ No newline at end of file
diff --git a/taranis/prediction.py b/taranis/prediction.py
index 1706853..da2a395 100644
--- a/taranis/prediction.py
+++ b/taranis/prediction.py
@@ -15,7 +15,7 @@
 )
 
 
-class Prediction():
+class Prediction:
     def __init__(self, genome_ref, sample_file, out_dir):
         self.genome_ref = genome_ref
         self.sample_file = sample_file
@@ -33,33 +33,54 @@ def __init__(self, genome_ref, sample_file, out_dir):
             except OSError as e:
                 log.error("Cannot create %s directory", self.out_dir)
                 log.error(e)
-                stderr.print (f"[red] Unable to create {self.out_dir} folder")
+                stderr.print(f"[red] Unable to create {self.out_dir} folder")
                 exit(1)
 
     def training(self):
-        prodigal_command = ["prodigal" , "-i", self.genome_ref, "-t", self.train]
+        prodigal_command = ["prodigal", "-i", self.genome_ref, "-t", self.train]
         try:
-            _ = subprocess.run(prodigal_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+            _ = subprocess.run(
+                prodigal_command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                check=True,
+            )
         except Exception as e:
             log.error("Unable to execute prodigal command for training")
             log.error(e)
-            stderr.print (f"[red] Unable to run prodigal commmand. ERROR {e} ")
+            stderr.print(f"[red] Unable to run prodigal commmand. ERROR {e} ")
             exit(1)
         return
 
-
-
     def prediction(self):
-        
-        prodigal_command = ["prodigal" , "-i", self.sample_file , "-t", self.train, "-f", "gff", "-o", self.pred_coord, "-a", self.pred_protein, "-d", self.pred_gene]
+        prodigal_command = [
+            "prodigal",
+            "-i",
+            self.sample_file,
+            "-t",
+            self.train,
+            "-f",
+            "gff",
+            "-o",
+            self.pred_coord,
+            "-a",
+            self.pred_protein,
+            "-d",
+            self.pred_gene,
+        ]
         try:
-            _ = subprocess.run(prodigal_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+            _ = subprocess.run(
+                prodigal_command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                check=True,
+            )
         except Exception as e:
             log.error("Unable to execute prodigal command for training")
             log.error(e)
-            stderr.print (f"[red] Unable to run prodigal commmand. ERROR {e} ")
+            stderr.print(f"[red] Unable to run prodigal commmand. ERROR {e} ")
             exit(1)
         return
 
     def get_sequence(self):
-        return
\ No newline at end of file
+        return
diff --git a/taranis/pruebas.py b/taranis/pruebas.py
index 4481b72..0303bf2 100644
--- a/taranis/pruebas.py
+++ b/taranis/pruebas.py
@@ -25,7 +25,7 @@
 locus_list = []
 for line in lines:
     line = line.strip()
-    if line == "#Cluster 5" :
+    if line == "#Cluster 5":
         if alleles_found == False:
             alleles_found = True
             continue
@@ -37,7 +37,9 @@
 # import pdb; pdb.set_trace()
 rand_locus = random.choice(locus_list)
 schema_file = "/media/lchapado/Reference_data/proyectos_isciii/taranis/taranis_testing_data/listeria_testing_schema/lmo0002.fasta"
-new_schema_file = "/media/lchapado/Reference_data/proyectos_isciii/taranis/test/cluster_lmo0002.fasta"
+new_schema_file = (
+    "/media/lchapado/Reference_data/proyectos_isciii/taranis/test/cluster_lmo0002.fasta"
+)
 q_file = "/media/lchapado/Reference_data/proyectos_isciii/taranis/test/q_file.fasta"
 with open(schema_file) as fh:
     with open(new_schema_file, "w") as fo:
@@ -52,15 +54,39 @@
             if record.id == rand_locus:
                 SeqIO.write(record, fo, "fasta")
                 break
-print ("Selected locus: " , rand_locus)
-db_name ="/media/lchapado/Reference_data/proyectos_isciii/taranis/test/testing_clster/lmo0002"
-blast_command = ['makeblastdb' , '-in' , new_schema_file , '-parse_seqids', '-dbtype',  "nucl", '-out' , db_name]
-blast_result = subprocess.run(blast_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+print("Selected locus: ", rand_locus)
+db_name = "/media/lchapado/Reference_data/proyectos_isciii/taranis/test/testing_clster/lmo0002"
+blast_command = [
+    "makeblastdb",
+    "-in",
+    new_schema_file,
+    "-parse_seqids",
+    "-dbtype",
+    "nucl",
+    "-out",
+    db_name,
+]
+blast_result = subprocess.run(
+    blast_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+)
 
 blast_parameters = '"6 , qseqid , sseqid , pident ,  qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq"'
 # pdb.set_trace()
-#db=self.blast_dir, evalue=evalue, perc_identity=perc_identity_ref, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=core_reference_allele_path)
-cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=q_file)
+# db=self.blast_dir, evalue=evalue, perc_identity=perc_identity_ref, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=core_reference_allele_path)
+cline = NcbiblastnCommandline(
+    db=db_name,
+    evalue=0.001,
+    perc_identity=90,
+    reward=1,
+    penalty=-2,
+    gapopen=1,
+    gapextend=1,
+    outfmt=blast_parameters,
+    max_target_seqs=1100,
+    max_hsps=1000,
+    num_threads=4,
+    query=q_file,
+)
 
 try:
     out, _ = cline()
@@ -69,4 +95,4 @@
 b_lines = out.splitlines()
 print("longitud del cluster = ", len(locus_list))
 print("numero de matches = ", len(b_lines))
-# pdb.set_trace()
\ No newline at end of file
+# pdb.set_trace()
diff --git a/taranis/reference_alleles.py b/taranis/reference_alleles.py
index db20a61..13819af 100644
--- a/taranis/reference_alleles.py
+++ b/taranis/reference_alleles.py
@@ -21,6 +21,7 @@
     force_terminal=taranis.utils.rich_force_colors(),
 )
 
+
 class ReferenceAlleles:
     def __init__(self, fasta_file, output):
         self.fasta_file = fasta_file
@@ -45,7 +46,7 @@ def check_locus_quality(self):
                     # Check if multiple stop codon by translating to protein and
                     # comparing length
                     locus_prot = Seq(record.seq).translate()
-                    if len(locus_prot) == int(len(seq)/3):
+                    if len(locus_prot) == int(len(seq) / 3):
                         self.locus_quality[record.id] = "good quality"
                         self.selected_locus[record.id] = seq
                     else:
@@ -59,7 +60,7 @@ def check_locus_quality(self):
                     # Matched reverse start codon
                     if s_codon_f.group(1) in STOP_CODONS_REVERSE:
                         locus_prot = Seq(record.seq).reverse_complement().translate()
-                        if len(locus_prot) == int(len(record.seq)/3):
+                        if len(locus_prot) == int(len(record.seq) / 3):
                             self.locus_quality[record.id] = "good quality"
                             self.selected_locus[record.id] = seq
                         else:
@@ -73,92 +74,119 @@ def check_locus_quality(self):
     def create_matrix_distance(self):
         # f_name = os.path.basename(self.fasta_file).split('.')[0]
         f_name = os.path.basename(self.fasta_file)
-        mash_folder = os.path.join(self.output, "mash" )
+        mash_folder = os.path.join(self.output, "mash")
         # _ = taranis.utils.write_fasta_file(mash_folder, self.selected_locus, multiple_files=True, extension=False)
         # save directory to return after mash
         working_dir = os.getcwd()
         os.chdir(mash_folder)
         # run mash sketch command
-        sketch_file =  "reference.msh"
+        sketch_file = "reference.msh"
         mash_sketch_command = ["mash", "sketch", "-i", "-o", sketch_file, f_name]
         # mash sketch -i -o prueba.msh lmo0003.fasta
         # mash_sketch_command += list(self.selected_locus.keys())
-       
-        mash_sketch_result = subprocess.run(mash_sketch_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        mash_sketch_result = subprocess.run(
+            mash_sketch_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
         # Get pairwise allele sequences mash distances
         # mash_distance_command = ["mash", "dist", sketch_path, sketch_path]
-        mash_distance_command = ["mash", "triangle", "-i",  "reference.msh"]
-        mash_distance_result = subprocess.Popen(mash_distance_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        mash_distance_command = ["mash", "triangle", "-i", "reference.msh"]
+        mash_distance_result = subprocess.Popen(
+            mash_distance_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
         # pdb.set_trace()
         out, err = mash_distance_result.communicate()
-        with open ("matrix_distance.tsv", "w") as fo:
+        with open("matrix_distance.tsv", "w") as fo:
             # adding alleles to create a heading
             # the value are not required to be in order, just only any name and the right length
-            fo.write( "alleles\t" + "\t".join(list(self.selected_locus.keys())) + "\n") 
+            fo.write("alleles\t" + "\t".join(list(self.selected_locus.keys())) + "\n")
             fo.write(out.decode("UTF-8"))
         import pandas as pd
+
         locus_num = len(self.selected_locus)
         # pdb.set_trace()
         matrix_df = pd.read_csv("matrix_distance.tsv", sep="\t").fillna(value=0)
         # remove the first line of the matrix that contain only the number of alleles
         matrix_df = matrix_df.drop(0)
-        locus_list = matrix_df.iloc[0:locus_num,0]
-        matrix_np = matrix_df.iloc[:,1:].to_numpy()
+        locus_list = matrix_df.iloc[0:locus_num, 0]
+        matrix_np = matrix_df.iloc[:, 1:].to_numpy()
         # convert the triangular matrix to mirror up triangular part
         t_matrix_np = matrix_np.transpose()
-        matrix_np = t_matrix_np + matrix_np 
+        matrix_np = t_matrix_np + matrix_np
         # values_np = matrix_df.iloc[:,2].to_numpy()
-       
+
         # matrix_np = values_np.reshape(locus_num, locus_num)
         # out = out.decode('UTF-8').split('\n')
         from sklearn.cluster import AgglomerativeClustering
-        clusterer = AgglomerativeClustering(n_clusters=7, metric="precomputed", linkage="average", distance_threshold=None)
+
+        clusterer = AgglomerativeClustering(
+            n_clusters=7,
+            metric="precomputed",
+            linkage="average",
+            distance_threshold=None,
+        )
         clusters = clusterer.fit_predict(matrix_np)
         # clustering = AgglomerativeClustering(affinity="precomputed").fit(matrix_np)
-        mean_distance =np.mean(matrix_np, 0)
+        mean_distance = np.mean(matrix_np, 0)
         std = np.std(matrix_np)
         min_mean = min(mean_distance)
         mean_all_alleles = np.mean(mean_distance)
-        max_mean= max(mean_distance)
+        max_mean = max(mean_distance)
         # buscar el indice que tiene el minimo valor de media
-        min_mean_idx= np.where(mean_distance==float(min_mean))[0][0]
+        min_mean_idx = np.where(mean_distance == float(min_mean))[0][0]
         # create fasta file with the allele
         min_allele = self.selected_locus[locus_list[min_mean_idx]]
 
         record_allele_folder = os.path.join(os.getcwd(), f_name.split(".")[0])
-        min_allele_file = taranis.utils.write_fasta_file(record_allele_folder,min_allele, locus_list[min_mean_idx])
+        min_allele_file = taranis.utils.write_fasta_file(
+            record_allele_folder, min_allele, locus_list[min_mean_idx]
+        )
         # pdb.set_trace()
         # busca el indice que tiene el valor de la media
-        mean_all_closser_value = taranis.utils.find_nearest_numpy_value(mean_distance, mean_all_alleles)
-        mean_all_alleles_idx= np.where(mean_distance==float(mean_all_closser_value))[0][0]
+        mean_all_closser_value = taranis.utils.find_nearest_numpy_value(
+            mean_distance, mean_all_alleles
+        )
+        mean_all_alleles_idx = np.where(mean_distance == float(mean_all_closser_value))[
+            0
+        ][0]
         # create fasta file with the allele
         mean_allele = self.selected_locus[locus_list[mean_all_alleles_idx]]
         # record_allele_folder = os.path.join(mash_folder, f_name)
-        mean_allele_file = taranis.utils.write_fasta_file(record_allele_folder,mean_allele, locus_list[mean_all_alleles_idx])
-        
+        mean_allele_file = taranis.utils.write_fasta_file(
+            record_allele_folder, mean_allele, locus_list[mean_all_alleles_idx]
+        )
+
         # busca el indice con la mayor distancia
-        max_mean_idx= np.where(mean_distance==float(max_mean))[0][0]
+        max_mean_idx = np.where(mean_distance == float(max_mean))[0][0]
         # create fasta file with the allele
         max_allele = self.selected_locus[locus_list[max_mean_idx]]
-        max_allele_file = taranis.utils.write_fasta_file(record_allele_folder,max_allele, locus_list[max_mean_idx])
-
+        max_allele_file = taranis.utils.write_fasta_file(
+            record_allele_folder, max_allele, locus_list[max_mean_idx]
+        )
 
         # Elijo un outlier lmo0002_185 para ver la distancia
         outlier_allele = self.selected_locus[locus_list[184]]
-        outlier_allele_file = taranis.utils.write_fasta_file(record_allele_folder,outlier_allele, locus_list[184])
+        outlier_allele_file = taranis.utils.write_fasta_file(
+            record_allele_folder, outlier_allele, locus_list[184]
+        )
 
-        # elijo un segundo outlier lmo0002_95 que tiene como cluster =1 
+        # elijo un segundo outlier lmo0002_95 que tiene como cluster =1
         outlier2_allele = self.selected_locus[locus_list[95]]
-        outlier2_allele_file = taranis.utils.write_fasta_file(record_allele_folder,outlier2_allele, locus_list[95])
-        
-        # elijo un tercer outlier lmo0002_185 que tiene como cluster =4 
+        outlier2_allele_file = taranis.utils.write_fasta_file(
+            record_allele_folder, outlier2_allele, locus_list[95]
+        )
+
+        # elijo un tercer outlier lmo0002_185 que tiene como cluster =4
         outlier3_allele = self.selected_locus[locus_list[185]]
-        outlier3_allele_file = taranis.utils.write_fasta_file(record_allele_folder,outlier3_allele, locus_list[185])
+        outlier3_allele_file = taranis.utils.write_fasta_file(
+            record_allele_folder, outlier3_allele, locus_list[185]
+        )
 
         # saca una lista de cuantas veces se repite un valor
         np.bincount(clusters)
         blast_parameters = '"6 , qseqid , sseqid , pident ,  qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq"'
         from Bio.Blast.Applications import NcbiblastnCommandline
+
         # Create local BLAST database for all alleles in the locus
         db_name = "/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/blast/locus_db"
         # db_name = os.path.join("blast", 'locus_blastdb')
@@ -169,16 +197,29 @@ def create_matrix_distance(self):
         # taranis.utils.create_blastdb(fasta_file, db_name, 'nucl', logger):
         # locus_db_name = os.path.join(db_name, f_name[0], f_name[0])
         # query_data= self.selected_locus["lmo0002_1"]
-        # All alleles in locus VS reference allele chosen (centroid) BLAST 
-        
+        # All alleles in locus VS reference allele chosen (centroid) BLAST
+
         # ref_query_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/query.fasta"
         # cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=100, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=0, max_hsps=0, num_threads=4, query=ref_query_file)
 
-        # minima distancia . 
+        # minima distancia .
         # min_dist_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/lmo0002_610"
         # pdb.set_trace()
         min_dist_file = os.path.join(record_allele_folder, min_allele_file)
-        cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=min_dist_file)
+        cline = NcbiblastnCommandline(
+            db=db_name,
+            evalue=0.001,
+            perc_identity=90,
+            reward=1,
+            penalty=-2,
+            gapopen=1,
+            gapextend=1,
+            outfmt=blast_parameters,
+            max_target_seqs=1100,
+            max_hsps=1000,
+            num_threads=4,
+            query=min_dist_file,
+        )
         out, err = cline()
         min_dist_lines = out.splitlines()
         min_dist_alleles = []
@@ -187,11 +228,24 @@ def create_matrix_distance(self):
         min_np = np.array(min_dist_alleles)
         # pdb.set_trace()
         print("matches con min distancia: ", len(min_dist_lines))
-        print("Not coverage using as reference" , np.setdiff1d(locus_list, min_np))
+        print("Not coverage using as reference", np.setdiff1d(locus_list, min_np))
         # distancia media. Sale 133 matches
         # mean_dist_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/lmo0002_870"
-        mean_dist_file =  os.path.join(record_allele_folder, mean_allele_file)
-        cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=mean_dist_file)
+        mean_dist_file = os.path.join(record_allele_folder, mean_allele_file)
+        cline = NcbiblastnCommandline(
+            db=db_name,
+            evalue=0.001,
+            perc_identity=90,
+            reward=1,
+            penalty=-2,
+            gapopen=1,
+            gapextend=1,
+            outfmt=blast_parameters,
+            max_target_seqs=1100,
+            max_hsps=1000,
+            num_threads=4,
+            query=mean_dist_file,
+        )
         out, err = cline()
         mean_dist_lines = out.splitlines()
         mean_dist_alleles = []
@@ -199,12 +253,25 @@ def create_matrix_distance(self):
             mean_dist_alleles.append(mean_dist.split("\t")[1])
         mean_np = np.array(mean_dist_alleles)
         print("matches con distancia media: ", len(mean_dist_lines))
-        print("Not coverage using as reference" , np.setdiff1d(locus_list, mean_np))
-        
-        # maxima distancia, 
+        print("Not coverage using as reference", np.setdiff1d(locus_list, mean_np))
+
+        # maxima distancia,
         # ref_query_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/lmo0002_216"
-        max_dist_file =  os.path.join(record_allele_folder, max_allele_file)
-        cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=max_dist_file)
+        max_dist_file = os.path.join(record_allele_folder, max_allele_file)
+        cline = NcbiblastnCommandline(
+            db=db_name,
+            evalue=0.001,
+            perc_identity=90,
+            reward=1,
+            penalty=-2,
+            gapopen=1,
+            gapextend=1,
+            outfmt=blast_parameters,
+            max_target_seqs=1100,
+            max_hsps=1000,
+            num_threads=4,
+            query=max_dist_file,
+        )
         out, err = cline()
         max_dist_lines = out.splitlines()
         max_dist_alleles = []
@@ -212,12 +279,25 @@ def create_matrix_distance(self):
             max_dist_alleles.append(max_dist.split("\t")[1])
         max_np = np.array(max_dist_alleles)
         print("matches con max distancia: ", len(max_dist_lines))
-        print("Not coverage using as reference" , np.setdiff1d(locus_list, max_np))
-        
-        # eligiendo uno de los outliers , 
+        print("Not coverage using as reference", np.setdiff1d(locus_list, max_np))
+
+        # eligiendo uno de los outliers ,
         # outlier_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/lmo0002_183"
-        outlier_file =  os.path.join(record_allele_folder, outlier_allele_file)
-        cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=outlier_file)
+        outlier_file = os.path.join(record_allele_folder, outlier_allele_file)
+        cline = NcbiblastnCommandline(
+            db=db_name,
+            evalue=0.001,
+            perc_identity=90,
+            reward=1,
+            penalty=-2,
+            gapopen=1,
+            gapextend=1,
+            outfmt=blast_parameters,
+            max_target_seqs=1100,
+            max_hsps=1000,
+            num_threads=4,
+            query=outlier_file,
+        )
         out, err = cline()
         outlier_lines = out.splitlines()
         outlier_alleles = []
@@ -226,14 +306,27 @@ def create_matrix_distance(self):
         outlier_np = np.array(outlier_alleles)
         print("matches con outliers distancia: ", len(outlier_lines))
 
-        print("Alleles added using outlier as reference" , outlier_np)
+        print("Alleles added using outlier as reference", outlier_np)
         new_ref_np = np.unique(np.concatenate((min_np, outlier_np), axis=0))
         print("\n", "remaining alleles ", np.setdiff1d(locus_list, new_ref_np))
 
-        # eligiendo el segundo de los outliers , 
+        # eligiendo el segundo de los outliers ,
         # outlier_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/lmo0002_183"
-        outlier2_file =  os.path.join(record_allele_folder, outlier2_allele_file)
-        cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=outlier2_file)
+        outlier2_file = os.path.join(record_allele_folder, outlier2_allele_file)
+        cline = NcbiblastnCommandline(
+            db=db_name,
+            evalue=0.001,
+            perc_identity=90,
+            reward=1,
+            penalty=-2,
+            gapopen=1,
+            gapextend=1,
+            outfmt=blast_parameters,
+            max_target_seqs=1100,
+            max_hsps=1000,
+            num_threads=4,
+            query=outlier2_file,
+        )
         out, err = cline()
         outlier2_lines = out.splitlines()
         outlier2_alleles = []
@@ -243,12 +336,29 @@ def create_matrix_distance(self):
         print("matches con second outliers distance: ", len(outlier2_lines))
         # print("Alleles added using second outlier as reference" , outlier2_np)
         upd_new_ref_np = np.unique(np.concatenate((new_ref_np, outlier2_np), axis=0))
-        print("\n", "remaining alleles after second outlier", np.setdiff1d(locus_list, upd_new_ref_np))
+        print(
+            "\n",
+            "remaining alleles after second outlier",
+            np.setdiff1d(locus_list, upd_new_ref_np),
+        )
 
-        # eligiendo el tercero de los outliers , 
+        # eligiendo el tercero de los outliers ,
         # outlier_file="/media/lchapado/Reference_data/proyectos_isciii/taranis/new_taranis_result_code/mash/lmo0002/lmo0002_183"
-        outlier3_file =  os.path.join(record_allele_folder, outlier3_allele_file)
-        cline = NcbiblastnCommandline(db=db_name, evalue=0.001, perc_identity=90, reward=1, penalty=-2, gapopen=1, gapextend=1, outfmt=blast_parameters, max_target_seqs=1100, max_hsps=1000, num_threads=4, query=outlier3_file)
+        outlier3_file = os.path.join(record_allele_folder, outlier3_allele_file)
+        cline = NcbiblastnCommandline(
+            db=db_name,
+            evalue=0.001,
+            perc_identity=90,
+            reward=1,
+            penalty=-2,
+            gapopen=1,
+            gapextend=1,
+            outfmt=blast_parameters,
+            max_target_seqs=1100,
+            max_hsps=1000,
+            num_threads=4,
+            query=outlier3_file,
+        )
         out, err = cline()
         outlier3_lines = out.splitlines()
         outlier3_alleles = []
@@ -257,11 +367,16 @@ def create_matrix_distance(self):
         outlier3_np = np.array(outlier3_alleles)
         print("matches con third outliers distance: ", len(outlier3_lines))
         # print("Alleles added using second outlier as reference" , outlier2_np)
-        upd2_new_ref_np = np.unique(np.concatenate((upd_new_ref_np, outlier3_np), axis=0))
-        print("\n", "remaining alleles after second outlier", np.setdiff1d(locus_list, upd2_new_ref_np))
-
-        print("\n Still missing " ,len( np.setdiff1d(locus_list, upd2_new_ref_np)))
+        upd2_new_ref_np = np.unique(
+            np.concatenate((upd_new_ref_np, outlier3_np), axis=0)
+        )
+        print(
+            "\n",
+            "remaining alleles after second outlier",
+            np.setdiff1d(locus_list, upd2_new_ref_np),
+        )
 
+        print("\n Still missing ", len(np.setdiff1d(locus_list, upd2_new_ref_np)))
 
         pdb.set_trace()
 
@@ -270,13 +385,11 @@ def create_matrix_distance(self):
         # X = np.array([[0, 2, 3], [2, 0, 3], [3, 3, 0]])
         # clustering = AgglomerativeClustering(affinity="precomputed").fit(X)
 
-
     def create_ref_alleles(self):
         self.records = taranis.utils.read_fasta_file(self.fasta_file)
         _ = self.check_locus_quality()
         # pdb.set_trace()
         # Prepare data to use mash to create the distance matrix
         _ = self.create_matrix_distance()
-        
 
-        pass
\ No newline at end of file
+        pass
diff --git a/taranis/utils.py b/taranis/utils.py
index f41f297..bd1b09a 100644
--- a/taranis/utils.py
+++ b/taranis/utils.py
@@ -4,16 +4,17 @@
 """
 
 import glob
-
+import io
 import logging
 import numpy as np
 import questionary
 import os
 import plotly.graph_objects as go
+import re
 import rich.console
 import shutil
 import subprocess
-
+import tarfile
 
 import sys
 
@@ -22,8 +23,10 @@
 from Bio.SeqRecord import SeqRecord
 
 import pdb
+
 log = logging.getLogger(__name__)
 
+
 def rich_force_colors():
     """
     Check if any environment variables are set to force Rich to use coloured output
@@ -35,6 +38,8 @@ def rich_force_colors():
     ):
         return True
     return None
+
+
 stderr = rich.console.Console(
     stderr=True,
     style="dim",
@@ -42,39 +47,73 @@ def rich_force_colors():
     force_terminal=rich_force_colors(),
 )
 
-START_CODON_FORWARD= ['ATG','ATA','ATT','GTG', 'TTG']
-start_codon_reverse= ['CAT', 'TAT','AAT','CAC','CAA']
+START_CODON_FORWARD = ["ATG", "ATA", "ATT", "GTG", "TTG"]
+START_CODON_REVERSE = ["CAT", "TAT", "AAT", "CAC", "CAA"]
+
+STOP_CODON_FORWARD = ["TAA", "TAG", "TGA"]
+STOP_CODON_REVERSE = ["TTA", "CTA", "TCA"]
 
-STOP_CODON_FORWARD = ['TAA', 'TAG','TGA']
-stop_codon_reverse = ['TTA', 'CTA','TCA']
+POSIBLE_BAD_QUALITY = [
+    "not a start codon",
+    "not a stop codon",
+    "Extra in frame stop codon",
+    "is not a multiple of three",
+    "Duplicate allele",
+    "Sub set allele",
+]
 
-POSIBLE_BAD_QUALITY = ["Can not be converted to protein", "Start codon not found", "Stop codon not found", "Multiple stop codons found" ,"Duplicate allele", "Sub set allele"]
 
-def check_sequence_order(allele_sequence):
+def get_seq_direction(allele_sequence):
     # check direction
-    if allele_sequence[0:3] in START_CODON_FORWARD or allele_sequence[-3:] in STOP_CODON_FORWARD:
-        return 'forward'
-    if allele_sequence[-3:] in start_codon_reverse or allele_sequence[0:3] in stop_codon_reverse:
-        return 'reverse'
+    if (
+        allele_sequence[0:3] in START_CODON_FORWARD
+        or allele_sequence[-3:] in STOP_CODON_FORWARD
+    ):
+        return "forward"
+    if (
+        allele_sequence[-3:] in START_CODON_REVERSE
+        or allele_sequence[0:3] in STOP_CODON_REVERSE
+    ):
+        return "reverse"
     return "Error"
 
-def create_annotation_files(fasta_file, annotation_dir, prefix, genus="Genus", species="species", usegenus=False):
+
+def create_annotation_files(
+    fasta_file,
+    annotation_dir,
+    prefix,
+    genus="Genus",
+    species="species",
+    usegenus=False,
+    cpus=3,
+):
     try:
-        _ = subprocess.run (['prokka', fasta_file, '--force', '--outdir', annotation_dir, '--genus', genus, '--species', species, '--usegenus', str(usegenus), '--gcode', '11', '--prefix', prefix, '--quiet'])
+        _ = subprocess.run(
+            [
+                "prokka",
+                fasta_file,
+                "--force",
+                "--outdir",
+                annotation_dir,
+                "--genus",
+                genus,
+                "--species",
+                species,
+                "--usegenus",
+                str(usegenus),
+                "--gcode",
+                "11",
+                "--prefix",
+                prefix,
+                "--cpus",
+                str(cpus),
+                "--quiet",
+            ]
+        )
     except Exception as e:
-        log.error("Unable to run prokka. Error message: %s ", e )
+        log.error("Unable to run prokka. Error message: %s ", e)
         stderr.print("[red] Unable to run prokka. Given error; " + e)
         sys.exit(1)
-    # Check that prokka store files in the requested folder
-    # if prokka results are not found in the requested folder then move from the
-    # running directory to the right one
-    if not folder_exists(annotation_dir):
-        try:
-            shutil.move(prefix, annotation_dir)
-        except Exception as e:
-            log.error("Unable to move prokka result folder to %s ", e )
-            stderr.print("[red] Unable to move result prokka folder. Error; " + e)
-            sys.exit(1)
     return os.path.join(annotation_dir, prefix)
 
 
@@ -88,14 +127,19 @@ def create_new_folder(folder_name):
     return
 
 
-def  create_graphic(out_folder, f_name, mode, x_data, y_data, labels, title ):
+def create_graphic(out_folder, f_name, mode, x_data, y_data, labels, title):
     fig = go.Figure()
     # pdb.set_trace()
     if mode == "lines":
         fig.add_trace(go.Scatter(x=x_data, y=y_data, mode=mode, name=title))
     elif mode == "pie":
         fig.add_trace(go.Pie(labels=labels, values=x_data))
-        fig.update_layout(title_text= title)
+    elif mode == "bar":
+        fig.add_trace(go.Bar(x=x_data, y=y_data))
+    elif mode == "box":
+        fig.add_trace(go.Box(y=y_data))
+
+    fig.update_layout(title_text=title)
     fig.write_image(os.path.join(out_folder, f_name))
 
 
@@ -106,7 +150,7 @@ def get_files_in_folder(folder, extension=None):
     Args:
         folder (string): folder path
         extension (string, optional): extension for filtering. Defaults to None.
-    
+
     Returns:
         list: list of files which match the condition
     """
@@ -116,14 +160,16 @@ def get_files_in_folder(folder, extension=None):
         sys.exit(1)
     if extension is None:
         extension = "*"
-    folder_files = os.path.join(folder , "*." + extension)
+    folder_files = os.path.join(folder, "*." + extension)
     files_in_folder = glob.glob(folder_files)
     if len(files_in_folder) == 0:
-        log.error("Folder %s does not have any file which the extension %s", folder, extension)
+        log.error(
+            "Folder %s does not have any file which the extension %s", folder, extension
+        )
         stderr.print("[red] Folder does not have any file which match your request")
         sys.exit(1)
     return files_in_folder
-    
+
 
 def file_exists(file_to_check):
     """Checks if input file exists
@@ -133,28 +179,18 @@ def file_exists(file_to_check):
 
     Returns:
         boolean: True if exists
-    """    
+    """
     if os.path.isfile(file_to_check):
         return True
     return False
 
-def find_multiple_stop_codons(seq) :
-    stop_codons = ['TAA', 'TAG','TGA']
-    c_index = []
-    for idx in range (0, len(seq) -2, 3) :
-        c_seq = seq[idx : idx + 3]
-        if c_seq in stop_codons :
-            c_index.append(idx)
-    if len(c_index) == 1:
-        return False
-    return True
-
 
 def find_nearest_numpy_value(array, value):
     array = np.asarray(array)
     idx = (np.abs(array - value)).argmin()
     return array[idx]
 
+
 def folder_exists(folder_to_check):
     """Checks if input folder exists
 
@@ -163,15 +199,17 @@ def folder_exists(folder_to_check):
 
     Returns:
         boolean: True if exists
-    """    
+    """
     if os.path.isdir(folder_to_check):
         return True
     return False
 
+
 def prompt_text(msg):
     source = questionary.text(msg).unsafe_ask()
     return source
 
+
 def query_user_yes_no(question, default):
     """Query the user to choose yes or no for the query question
 
@@ -180,8 +218,8 @@ def query_user_yes_no(question, default):
         default (string): default option to be used: yes or no
 
     Returns:
-        user select: True continue with code 
-    """ 
+        user select: True continue with code
+    """
     valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
     if default is None:
         prompt = " [y/n] "
@@ -204,36 +242,37 @@ def query_user_yes_no(question, default):
         else:
             sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
 
-def read_annotation_file(ann_file, allele_name, only_first_line=True):
 
-    """ example of annotation file
-    locus_tag	ftype	length_bp	gene	EC_number	COG	product
-    IEKBEMEO_00001	CDS	1344	yeeO_1		COG0534	putative FMN/FAD exporter YeeO
-    IEKBEMEO_00002	CDS	1344	yeeO_2		COG0534	putative FMN/FAD exporter YeeO
+def read_annotation_file(ann_file):
+    """example of annotation file
+
+    lmo0002_782	Prodigal:002006	CDS	1	1146	.	+	0	ID=OJGEGONH_00782;Name=dnaN_782;db_xref=COG:COG0592;gene=dnaN_782;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P05649;locus_tag=OJGEGONH_00782;product=Beta sliding clamp
+    lmo0002_783	Prodigal:002006	CDS	1	1146	.	+	0	ID=OJGEGONH_00783;Name=dnaN_783;db_xref=COG:COG0592;gene=dnaN_783;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P05649;locus_tag=OJGEGONH_00783;product=Beta sliding clamp
+    lmo0049_3	Prodigal:002006	CDS	1	162	.	+	0	ID=CODOCEEL_00001;inference=ab initio prediction:Prodigal:002006;locus_tag=CODOCEEL_00001;product=hypothetical protein
+    lmo0049_6	Prodigal:002006	CDS	1	162	.	+	0	ID=CODOCEEL_00002;inference=ab initio prediction:Prodigal:002006;locus_tag=CODOCEEL_00002;product=hypothetical protein
 
     """
     ann_data = {}
-    with open (ann_file, "r") as fh:
+    with open(ann_file, "r") as fh:
         lines = fh.readlines()
-    heading = lines[0].split("\t")
-    locus_tag_idx = heading.index("locus_tag")
-    gene_idx = heading.index("gene")
-    if only_first_line:
-        first_line = lines[1].split("\t")
-        ann_data[allele_name] = first_line[gene_idx] if first_line[gene_idx] != "" else "Not found by Prokka"
-    else:
-        # Return all annotation lines
-        for line in lines[1:]:
-            s_line = line.strip().split("\t")
-            allele_key = allele_name + "_" + s_line[locus_tag_idx].split("_")[1]
-            ann_data[allele_key] = s_line[gene_idx] if s_line[gene_idx] != "" else "Not found by Prokka"
-    return ann_data
 
+    for line in lines:
+        if "Prodigal" in line:
+            gene_match = re.search(r"(.*)[\t]Prodigal.*gene=(\w+)_.*", line)
+            if gene_match:
+                ann_data[gene_match.group(1)] = gene_match.group(2)
+            else:
+                pred_match = re.search(r"(.*)[\t]Prodigal.*product=(\w+)_.*", line)
+                if pred_match:
+                    ann_data[pred_match.group(1)] = pred_match.group(2).strip()
+        if "fasta" in line:
+            break
+    return ann_data
 
 
 def read_fasta_file(fasta_file):
     return SeqIO.parse(fasta_file, "fasta")
-    
+
 
 def write_fasta_file(out_folder, seq_data, allele_name=None, f_name=None):
     try:
@@ -246,22 +285,55 @@ def write_fasta_file(out_folder, seq_data, allele_name=None, f_name=None):
                 # use the fasta name as file name
                 f_name = key + ".fasta"
             f_path_name = os.path.join(out_folder, f_name)
-            with open (f_path_name, "w") as fo:
+            with open(f_path_name, "w") as fo:
                 fo.write(">" + key + "\n")
                 fo.write(seq)
     else:
         if f_name is None:
             f_name = allele_name
         f_path_name = os.path.join(out_folder, f_name)
-        with open (f_path_name, "w") as fo:
+        with open(f_path_name, "w") as fo:
             fo.write(">" + allele_name + "\n")
             fo.write(seq_data)
     return f_name
 
-def write_data_to_file(out_folder, f_name, data, include_header=True, data_type="pandas", extension="csv"):
-    f_path_name = os.path.join(out_folder,f_name)
+
+def write_data_to_compress_filed(out_folder, f_name, dump_data):
+    with io.BytesIO() as buffer:
+        with tarfile.open(fileobj=buffer, mode="w:gz") as tar:
+            # Add data to the tar archive
+            tarinfo = tarfile.TarInfo(f_name)
+            # Example: Write a string to the tar.gz file (replace this with your data)
+            data_bytes = dump_data.encode("utf-8")
+            tarinfo.size = len(data_bytes)
+            tar.addfile(tarinfo, io.BytesIO(data_bytes))
+
+        # Get the content of the in-memory tar.gz file
+        buffer.seek(0)
+        tar_data = buffer.read()
+    file_path_name = os.path.join(out_folder, Path(f_name).stem + ".tar.gz")
+    with open(file_path_name, "wb") as fo:
+        fo.write(tar_data)
+
+
+def write_data_to_file(
+    out_folder, f_name, data, include_header=True, data_type="pandas", extension="csv"
+):
+    f_path_name = os.path.join(out_folder, f_name)
     if data_type == "pandas":
-        data.to_csv(f_path_name, sep=",",header=include_header)
+        data.to_csv(f_path_name, sep=",", header=include_header)
         return
 
 
+"""
+def find_multiple_stop_codons(seq) :
+    stop_codons = ['TAA', 'TAG','TGA']
+    c_index = []
+    for idx in range (0, len(seq) -2, 3) :
+        c_seq = seq[idx : idx + 3]
+        if c_seq in stop_codons :
+            c_index.append(idx)
+    if len(c_index) == 1:
+        return False
+    return True
+"""