From ad1b389fe6efefa4983ec9d674f9ec4f6304d3ef Mon Sep 17 00:00:00 2001
From: Zhemin Zhou <zhemin.zhou@warwick.ac.uk>
Date: Wed, 17 Jun 2020 08:33:04 +0100
Subject: [PATCH] update to use name PEPPAN

---
 PEPPAN        |  2 +-
 PEPPAN.py     | 26 +++++++++++++-------------
 PEPPAN_parser |  2 +-
 setup.py      |  4 ++--
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/PEPPAN b/PEPPAN
index ebbaca1..a531683 120000
--- a/PEPPAN
+++ b/PEPPAN
@@ -1 +1 @@
-PEPPA.py
\ No newline at end of file
+PEPPAN.py
\ No newline at end of file
diff --git a/PEPPAN.py b/PEPPAN.py
index aa22400..8c82558 100755
--- a/PEPPAN.py
+++ b/PEPPAN.py
@@ -1514,19 +1514,19 @@ def setInFrame(part) :
                     allele_file.write('>{0}_{1}\n{2}\n'.format(gene, id, seq))
 
     #prediction = prediction[prediction.T[0] != '']
-    with open('{0}.PEPPA.gff'.format(prefix), 'w') as fout :
-        fout.write('#!gff-version 3\n#!annotation-source PEPPA from enterobase.warwick.ac.uk\n')
+    with open('{0}.PEPPAN.gff'.format(prefix), 'w') as fout :
+        fout.write('#!gff-version 3\n#!annotation-source PEPPAN from enterobase.warwick.ac.uk\n')
         for pid, pred in enumerate(prediction) :
             if pred[0] == '' : continue
             if pred[15] == 'misc_feature' :
-                fout.write('{0}\t{1}\tPEPPA\t{2}\t{3}\t.\t{4}\t.\tID={5};{7}inference={6}\n'.format(
+                fout.write('{0}\t{1}\tPEPPAN\t{2}\t{3}\t.\t{4}\t.\tID={5};{7}inference={6}\n'.format(
                     pred[5], 'misc_feature', pred[9], pred[10], pred[11],
                     '{0}_g_{1}'.format(prefix.rsplit('/', 1)[-1], pred[2]), pred[16],
                     'old_locus_tag={0}:{1}-{2};'.format(pred[0].split(':', 1)[1], pred[9], pred[10]),
                 ))
             else :
                 if pred[0] in removed :
-                    fout.write('{0}\t{1}\tPEPPA\t{2}\t{3}\t.\t{4}\t.\tID={5};{8}inference=ortholog_group:{6}{7}\n'.format(
+                    fout.write('{0}\t{1}\tPEPPAN\t{2}\t{3}\t.\t{4}\t.\tID={5};{8}inference=ortholog_group:{6}{7}\n'.format(
                         pred[5], 'misc_feature',
                         pred[9], pred[10], pred[11],
                         '{0}_g_{1}'.format(prefix.rsplit('/', 1)[-1], pred[2]), pred[13],
@@ -1535,7 +1535,7 @@ def setInFrame(part) :
                     ))
 
                 elif unreliable.get(pid, 1) == 2 :
-                    fout.write('{0}\t{1}\tPEPPA\t{2}\t{3}\t.\t{4}\t.\tID={5};{8}inference=ortholog_group:{6}{7}\n'.format(
+                    fout.write('{0}\t{1}\tPEPPAN\t{2}\t{3}\t.\t{4}\t.\tID={5};{8}inference=ortholog_group:{6}{7}\n'.format(
                         pred[5], 'pseudogene',
                         pred[9], pred[10], pred[11],
                         '{0}_g_{1}'.format(prefix.rsplit('/', 1)[-1], pred[2]), pred[13],
@@ -1544,7 +1544,7 @@ def setInFrame(part) :
                     ))
 
                 else :
-                    fout.write('{0}\t{1}\tPEPPA\t{2}\t{3}\t.\t{4}\t.\tID={5};{8}inference=ortholog_group:{6}{7}\n'.format(
+                    fout.write('{0}\t{1}\tPEPPAN\t{2}\t{3}\t.\t{4}\t.\tID={5};{8}inference=ortholog_group:{6}{7}\n'.format(
                         pred[5], 'pseudogene' if pred[15].startswith('pseudogen') else pred[15],
                         pred[9], pred[10], pred[11],
                         '{0}_g_{1}'.format(prefix.rsplit('/', 1)[-1], pred[2]), pred[13],
@@ -1553,7 +1553,7 @@ def setInFrame(part) :
                     ))
 
     allele_file.close()
-    logger('Pan genome annotations have been saved in {0}'.format('{0}.PEPPA.gff'.format(prefix)))
+    logger('Pan genome annotations have been saved in {0}'.format('{0}.PEPPAN.gff'.format(prefix)))
     logger('Gene allelic sequences have been saved in {0}'.format('{0}.allele.fna'.format(prefix)))
     return
 
@@ -1639,7 +1639,7 @@ def get_global_difference(geneGroups, cluFile, bsnFile, geneInGenomes, nGene = 1
 def add_args(a) :
     import argparse
     parser = argparse.ArgumentParser(description='''
-PEPPA.py
+PEPPAN
 (1) Retieve genes and genomic sequences from GFF files and FASTA files.
 (2) Group genes into clusters using mmseq.
 (3) Map gene clusters back to genomes. 
@@ -1648,7 +1648,7 @@ def add_args(a) :
 (6) Re-annotate genomes using the remained of orthologs. 
 ''', formatter_class=argparse.RawTextHelpFormatter)
     parser.add_argument('GFFs', metavar='GFF', help='[REQUIRED] GFF files containing both annotations and sequences. \nIf you have sequences and GFF annotations in separate files, \nthey can also be defined as: <GFF>,<fasta>', nargs='*')
-    parser.add_argument('-p', '--prefix', help='[Default: PEPPA] prefix for the outputs. ', default='PEPPA')
+    parser.add_argument('-p', '--prefix', help='[Default: PEPPAN] prefix for the outputs. ', default='PEPPAN')
     parser.add_argument('-g', '--genes', help='[optional] comma delimited filenames of fasta files containing additional genes. ', default='')
     parser.add_argument('-P', '--priority', help='[optional] comma delimited, ordered list of GFFs or gene fasta files that are more reliable than others. \nGenes contained in these files are preferred in all stages.', default='')
     parser.add_argument('-t', '--n_thread', help='[Default: 8] Number of threads to use. Default: 8', default=8, type=int)
@@ -1686,7 +1686,7 @@ def add_args(a) :
     parser.add_argument('--feature', help='feature to extract. Be cautious to change this value. DEFAULT: CDS', default='CDS')
     parser.add_argument('--noncoding', help='Set to noncoding mode. This is still under development. Equals to \n"--nucl --incompleteCDS sife"', default=False, action='store_true')
     parser.add_argument('--metagenome', help='Set to metagenome mode. This is still under development. Equals to \n"--nucl --incompleteCDS sife --clust_identity 0.99 --clust_match_prop 0.8 --match_identity 0.98 --orthology sbh"', default=False, action='store_true')
-    parser.add_argument('--testunit', help='download four E. coli ST131 genomes for testing of PEPPA.', default=False, action='store_true')
+    parser.add_argument('--testunit', help='download four E. coli ST131 genomes for testing of PEPPAN.', default=False, action='store_true')
 
     params = parser.parse_args(a)
     if params.testunit :
@@ -1729,10 +1729,10 @@ def prepare_testunit() :
     urllib.request.urlretrieve('https://github.com/zheminzhou/PEPPA/blob/master/examples/GCF_001577325.combined.gff.gz?raw=true', \
                                'examples/GCF_001577325.combined.gff.gz')
     sys.stderr.write('Folder "examples" has been created with four GFF files downloaded. \nRun:\n')
-    sys.stderr.write('$ PEPPA -p examples/ST131 -P examples/GCF_000010485.combined.gff.gz examples/*.gff.gz\n')
+    sys.stderr.write('$ PEPPAN -p examples/ST131 -P examples/GCF_000010485.combined.gff.gz examples/*.gff.gz\n')
     sys.stderr.write('To test the main program. And then run:\n')
-    sys.stderr.write('$ PEPPA_parser -g examples/ST131.PEPPA.gff -s examples/PEPPA_out -t -c -a 95\n')
-    sys.stderr.write('To test PEPPA_parser\n')
+    sys.stderr.write('$ PEPPAN_parser -g examples/ST131.PEPPAN.gff -s examples/PEPPAN_out -t -c -a 95\n')
+    sys.stderr.write('To test PEPPAN_parser\n')
 
 def encodeNames(genomes, genes, geneFiles, labelFile, reuse=False) :
     taxon = {g[0] for g in genomes.values()}
diff --git a/PEPPAN_parser b/PEPPAN_parser
index f215895..2030421 120000
--- a/PEPPAN_parser
+++ b/PEPPAN_parser
@@ -1 +1 @@
-PEPPA_parser.py
\ No newline at end of file
+PEPPAN_parser.py
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 3452190..5b9c7c1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 import os, sys
 from setuptools import setup, find_packages
 #from PEPPA import __VERSION__
-__VERSION__ = '1.0.3'
+__VERSION__ = '1.0.5'
 
 with open('README.rst', encoding='utf-8') as f:
     long_description = f.read()
@@ -19,7 +19,7 @@
     packages = ['PEPPAN'],
     package_dir = {'PEPPAN':'.'},
     keywords=['bioinformatics', 'microbial', 'genomics', 'MLST', 'pan-genome'],
-    install_requires=['ete3>=3.1.1', 'numba>=0.38.0', 'numpy>=1.18.1', 'pandas>=0.24.6', 'scipy>=1.3.2'],
+    install_requires=['ete3>=3.1.1', 'numba>=0.38.0', 'numpy>=1.18.1', 'pandas>=0.24.2', 'scipy>=1.3.2'],
     include_package_data=True,
     entry_points={
         'console_scripts': [