From fa0317035397ee86bc862eba23f013c260082329 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Wed, 22 Apr 2020 13:42:12 +0200
Subject: [PATCH 01/15] [MODIF] bump v1.2.0

---
 CHANGELOG    | 4 ++++
 bin/pyTMB.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 988cd06..861234a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+*************************************
+CHANGES IN VERSION 1.2.0
+
+
 *************************************
 CHANGES IN VERSION 1.1.0
 
diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index fff69c7..dd12ec2 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -14,7 +14,7 @@
 #
 ##############################################################################
 
-__version__='1.1.0'
+__version__='1.2.0'
 
 """
 This script is designed to calculate a TMB score from a VCF file.

From d483d5421ca8202e05c4203a310834c63d4f43ef Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Wed, 22 Apr 2020 14:15:51 +0200
Subject: [PATCH 02/15] [MODIF] typo in README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5d5b0be..a92ba4f 100644
--- a/README.md
+++ b/README.md
@@ -180,7 +180,7 @@ The option allows to export a vcf file with the tag **TMB_FILTERS** in the **INF
 
 ## Examples
 
-Let's calculated the TMB on a gene panel vcf file (coding size = 1.9Mb, caller = varscan, annotation = Annovar) as the following variants : 
+Let's calculated the TMB on a gene panel vcf file (coding size = 1.9Mb, caller = varscan, annotation = Annovar) with the following criteria: 
 - PASS
 - minDepth at 100X
 - non-synonymous

From e2a76a928f4474e17b8ecb9ca1b721d348ba8440 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Wed, 22 Apr 2020 17:58:18 +0200
Subject: [PATCH 03/15] [ADD] LICENCE

---
 LICENSE | 517 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 517 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e3dc991
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,517 @@
+CeCILL FREE SOFTWARE LICENSE AGREEMENT
+Copyright (c) Institut Curie
+
+Version 2.1 dated 2013-06-21
+
+    Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+  * firstly, compliance with the principles governing the distribution
+    of Free Software: access to source code, broad rights granted to users,
+  * secondly, the election of a governing law, French law, with which it
+    is conformant, both as regards the law of torts and intellectual
+    property law, and the protection that it offers to both authors and
+    holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre]) 
+license are: 
+
+Commissariat a  l'energie atomique et aux energies alternatives - CEA, a
+public scientific, technical and industrial research establishment,
+having its principal place of business at 25 rue Leblanc, immeuble Le
+Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+Inria, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+    Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of this right is conditional upon certain obligations for
+users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+Frequently asked questions can be found on the official website of the
+CeCILL licenses family (http://www.cecill.info/index.en.html) for any 
+necessary clarification.
+
+
+    Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+GNU Affero GPL: means the GNU Affero General Public License version 3 or
+any subsequent version, as published by the Free Software Foundation Inc.
+
+EUPL: means the European Union Public License version 1.1 or any
+subsequent version, as published by the European Commission.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+    Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 <#scope> hereinafter for the whole
+term of the protection granted by the rights over said Software.
+
+
+    Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+  * (i) loading the Software by any or all means, notably, by
+    downloading from a remote server, or by loading from a physical medium;
+  * (ii) the first time the Licensee exercises any of the rights granted
+    hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+<#accepting> hereinabove, and the Licensee hereby acknowledges that it
+has read and understood it.
+
+
+    Article 4 - EFFECTIVE DATE AND TERM
+
+
+      4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1 <#accepting>.
+
+
+      4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+    Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+      5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+    by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or all
+    medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+    determine the ideas and principles behind any or all constituent
+    elements of said Software. This shall apply when the Licensee
+    carries out any or all loading, displaying, running, transmission or
+    storage operation as regards the Software, that it is entitled to
+    carry out hereunder.
+
+
+      5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+      5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+        5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's warranty
+    and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows effective access to the full Source
+Code of the Software for a period of at least three years from the
+distribution of the Software, it being understood that the additional
+acquisition cost of the Source Code shall not exceed the cost of the
+data transfer.
+
+
+        5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the resulting Modified Software
+become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's warranty
+    and liability as set forth in Articles 8 and 9,
+
+and, in the event that only the object code of the Modified Software is
+redistributed,
+
+ 3. a note stating the conditions of effective access to the full source
+    code of the Modified Software for a period of at least three years
+    from the distribution of the Modified Software, it being understood
+    that the additional acquisition cost of the source code shall not
+    exceed the cost of the data transfer.
+
+
+        5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+        5.3.4 COMPATIBILITY WITH OTHER LICENSES
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL, GNU Affero GPL and/or EUPL in the
+Modified or unmodified Software, and distribute that entire code under
+the terms of the same version of the GNU GPL, GNU Affero GPL and/or EUPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+GNU Affero GPL and/or EUPL and distribute that entire code under the
+terms of the same version of the GNU GPL, GNU Affero GPL and/or EUPL.
+
+
+    Article 6 - INTELLECTUAL PROPERTY
+
+
+      6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2 <#term>.
+
+
+      6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+      6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+      6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+    notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies of
+    the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights on the Software of the Holder and/or
+Contributors, and to take, where applicable, vis-Ã -vis its staff, any
+and all measures required to ensure respect of said intellectual
+property rights of the Holder and/or Contributors.
+
+
+    Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+    Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+    Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5 <#scope>).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 <#good-faith> and, in particular,
+without any warranty as to its commercial value, its secured, safe,
+innovative or relevant nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+expertise for its defense. Such technical and legal expertise shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+    Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+    Article 11 - MISCELLANEOUS
+
+
+      11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+      11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+    Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4
+<#compatibility>.
+
+
+    Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.

From 7e5359b12f68fea5d94630eb73f2a2a6c8d3059a Mon Sep 17 00:00:00 2001
From: Gutman Tom <tgutman@shdli007.curie.fr>
Date: Tue, 28 Apr 2020 00:47:44 +0200
Subject: [PATCH 04/15] add --minAltDepth + pylint

---
 bin/pyTMB.py | 311 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 181 insertions(+), 130 deletions(-)

diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index fff69c7..700f36f 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -14,7 +14,7 @@
 #
 ##############################################################################
 
-__version__='1.1.0'
+__version__ = '1.1.0'
 
 """
 This script is designed to calculate a TMB score from a VCF file.
@@ -32,7 +32,15 @@
 --effGenomeSize 1590000 > TMB_results.log
 """
 
-import cyvcf2 
+
+"""
+Load yaml file
+"""
+
+
+
+
+import cyvcf2
 import argparse
 import sys
 import warnings
@@ -40,12 +48,6 @@
 import yaml
 import os.path
 from datetime import date
-
-
-
-"""
-Load yaml file
-"""
 def loadConfig(infile):
     with open(infile, 'r') as stream:
         try:
@@ -53,14 +55,17 @@ def loadConfig(infile):
         except:
             raise
 
+
 """
 Calculate Effective Genome Size from a BED file
 """
+
+
 def getEffGenomeSizeFromBed(infile, verbose=False):
 
     if verbose:
         print("## Loading BED file '", infile, "'...")
-        
+
     bedhandle = open(infile)
     effgs = 0
     nline = 0
@@ -70,12 +75,12 @@ def getEffGenomeSizeFromBed(infile, verbose=False):
         try:
             chromosome, start, end = bedtab[:3]
         except ValueError:
-            sys.stderr.write("Error : wrong input format in line", nline,". Not a BED file !?")
+            sys.stderr.write("Error : wrong input format in line", nline, ". Not a BED file !?")
             sys.exit(-1)
 
         intl = abs(int(end) - int(start))
         effgs += intl
-    
+
     bedhandle.close()
     return effgs
 
@@ -83,22 +88,24 @@ def getEffGenomeSizeFromBed(infile, verbose=False):
 """
 Check if a variant has the provided annotation flags
 """
+
+
 def isAnnotatedAs(v, infos, flags, sep):
 
-    ## Subset annotation information as list
+    # Subset annotation information as list
     subINFO = subsetINFO(infos, keys=flags.keys())
 
-    ## Compare list variant information and expected list of tags
+    # Compare list variant information and expected list of tags
     for sub in subINFO:
-        ## For all keys
+        # For all keys
         for k in flags.keys():
-            ## For each value in keys
+            # For each value in keys
             for val in flags[k]:
-                ## Use search to deal with multiple annotations
+                # Use search to deal with multiple annotations
                 for subval in sub[k].split(sep):
                     if val == subval:
                         return(True)
-                #else:
+                # else:
                 #    if re.search(val, sub[k]):
                 #        return(True)
     return(False)
@@ -107,6 +114,8 @@ def isAnnotatedAs(v, infos, flags, sep):
 """
 Check if a variant is in a genomeDb with a MAF > val
 """
+
+
 def isPolym(v, infos, flags, val):
 
     subINFO = subsetINFO(infos, keys=flags)
@@ -116,9 +125,12 @@ def isPolym(v, infos, flags, val):
                 return True
     return(False)
 
-"""                                                                                                                                                                                                         
+
+"""
 Check if a variant is annotated as a cancer hotspot
 """
+
+
 def isCancerHotspot(v, infos, flags):
     subINFO = subsetINFO(infos, keys=flags)
     for key in subINFO:
@@ -126,13 +138,16 @@ def isCancerHotspot(v, infos, flags):
             return True
     return(False)
 
+
 """
 Subset the annotation information to a few key values
 """
+
+
 def subsetINFO(annot, keys):
 
     if isinstance(annot, list):
-        subsetInfo=[]
+        subsetInfo = []
         for i in range(0, len(annot)):
             z = dict((k, annot[i][k]) for k in keys if k in annot[i])
             if len(z) > 0:
@@ -142,93 +157,119 @@ def subsetINFO(annot, keys):
 
     return(subsetInfo)
 
+
 """
 Format the INFO field from snpEff and return a list of dict
 ie. snpEff
 """
+
+
 def infoTag2dl(INFO):
 
     if INFO is not None:
-        annotTag = INFO.split(',')                                                                                                                                       
-        annotInfo=[]                                                                                                                                                                                 
-        for i in range(0, len(annotTag)):                                                                                                                                                            
-            annot=annotTag[i].split('|')                                                                                                                                                             
-            dictannot = {i : annot[i] for i in range(0, len(annot))}                                                                                                                                 
-            annotInfo.append(dictannot)                                                                                                                                                              
+        annotTag = INFO.split(',')
+        annotInfo = []
+        for i in range(0, len(annotTag)):
+            annot = annotTag[i].split('|')
+            dictannot = {i: annot[i] for i in range(0, len(annot))}
+            annotInfo.append(dictannot)
         return(annotInfo)
 
+
 """
 Format the INFO field from ANNOVAR and return a list of dict
 ie. annovar
 """
+
+
 def info2dl(INFO):
 
     if INFO is not None:
         return [dict(INFO)]
 
+
 """
 Get a tag value from either the format field or the info field
 """
+
+
 def getTag(v, tag):
 
-    ## First check in FORMAT field
+    # First check in FORMAT field
     if tag in variant.FORMAT:
-        val=variant.format(tag)
+        val = variant.format(tag)
         if float(val) < 0:
-            val=None
+            val = None
 
-    ## Otherwise, check in INFO field
+    # Otherwise, check in INFO field
     if tag not in variant.FORMAT or val is None:
-        val=variant.INFO.get(tag)
+        val = variant.INFO.get(tag)
 
     if val is not None:
-        val=float(val)
+        val = float(val)
 
     return(val)
 
+
 """
 Parse inputs
 """
+
+
 def argsParse():
     parser = argparse.ArgumentParser()
     parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
 
-    ## Configs
-    parser.add_argument("--dbConfig", help="Databases config file", default="./config/databases.yml")
-    parser.add_argument("--varConfig", help="Variant calling config file", default="./config/calling.yml")
- 
-    ## Efective genome size
+    # Configs
+    parser.add_argument("--dbConfig", help="Databases config file",
+                        default="./config/databases.yml")
+    parser.add_argument("--varConfig", help="Variant calling config file",
+                        default="./config/calling.yml")
+
+    # Efective genome size
     parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
-    parser.add_argument("--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
-    
-    ## Thresholds
-    parser.add_argument("--minVAF", help="Filter variants with Allelic Ratio < minVAF", type=float, default=0.05)
-    parser.add_argument("--minMAF", help="Filter variants with MAF < minMAF", type=float, default=0.001)
-    parser.add_argument("--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
-    
-    ## Which variants to use
-    parser.add_argument("--filterLowQual", help="Filter low quality (i.e not PASS) variant", action="store_true")
+    parser.add_argument(
+        "--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
+
+    # Thresholds
+    parser.add_argument(
+        "--minVAF", help="Filter variants with Allelic Ratio < minVAF", type=float, default=0.05)
+    parser.add_argument("--minMAF", help="Filter variants with MAF < minMAF",
+                        type=float, default=0.001)
+    parser.add_argument(
+        "--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
+    parser.add_argument(
+        "--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
+
+    # Which variants to use
+    parser.add_argument("--filterLowQual",
+                        help="Filter low quality (i.e not PASS) variant", action="store_true")
     parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
     parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
     parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
     parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
     parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
-    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants", action="store_true")
-    parser.add_argument("--filterCancerHotspot", help="Filter variants annotated as cancer hotspots", action="store_true")
-    parser.add_argument("--filterPolym", help="Filter polymorphism variants in genome databases. See --minMAF", action="store_true")
-    parser.add_argument("--filterRecurrence", help="Filter on recurrence values", action="store_true")
-    
-    ## Databases
-    parser.add_argument("--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
-    parser.add_argument("--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
-    
-    ## Others
+    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",
+                        action="store_true")
+    parser.add_argument("--filterCancerHotspot",
+                        help="Filter variants annotated as cancer hotspots", action="store_true")
+    parser.add_argument(
+        "--filterPolym", help="Filter polymorphism variants in genome databases. See --minMAF", action="store_true")
+    parser.add_argument("--filterRecurrence",
+                        help="Filter on recurrence values", action="store_true")
+
+    # Databases
+    parser.add_argument(
+        "--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
+    parser.add_argument(
+        "--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
+
+    # Others
     parser.add_argument("--verbose", action="store_true")
     parser.add_argument("--debug", action="store_true")
     parser.add_argument("--export", help="", action="store_true")
     parser.add_argument("--version", action='version', version="%(prog)s ("+__version__+")")
 
-    
     args = parser.parse_args()
     return (args)
 
@@ -237,17 +278,18 @@ def argsParse():
 
     args = argsParse()
 
-    ## Loading Data
+    # Loading Data
     vcf = cyvcf2.VCF(args.vcf)
 
     if args.export:
-        wx = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf', '_export.vcf', os.path.basename(args.vcf)), vcf)
+        wx = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
+                                  '_export.vcf', os.path.basename(args.vcf)), vcf)
 
     if args.debug:
         vcf.add_info_to_header({'ID': 'TMB_FILTERS', 'Description': 'Detected filters for TMB calculation',
-                                'Type':'Character', 'Number': '1'})                                                                                                                                      
-        wd = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf', '_debug.vcf', os.path.basename(args.vcf)), vcf)
-
+                                'Type': 'Character', 'Number': '1'})
+        wd = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
+                                  '_debug.vcf', os.path.basename(args.vcf)), vcf)
 
     dbFlags = loadConfig(args.dbConfig)
     callerFlags = loadConfig(args.varConfig)
@@ -259,122 +301,131 @@ def argsParse():
         if args.bed is not None:
             effGS = getEffGenomeSizeFromBed(args.bed)
         else:
-            sys.stderr.write("Error: Effective Genome Size not specified. See --effGenomeSize or --bed")
+            sys.stderr.write(
+                "Error: Effective Genome Size not specified. See --effGenomeSize or --bed")
             sys.exit(-1)
     else:
         effGS = args.effGenomeSize
-    
+
     for variant in vcf:
-        varCounter+=1
+        varCounter += 1
         if (varCounter % 1000 == 0 and args.verbose):
-            print ("## ",varCounter)
-            #if args.debug and varCounter == 100000:
+            print ("## ", varCounter)
+            # if args.debug and varCounter == 100000:
             #    sys.exit()
 
         try:
-            ## All vcf INFO
-            dbInfo=dict(variant.INFO)
-            debugInfo=""
+            # All vcf INFO
+            dbInfo = dict(variant.INFO)
+            debugInfo = ""
 
-            ## Get annotation INFO as a list of dict
+            # Get annotation INFO as a list of dict
             if dbFlags['tag'] != '':
                 annotInfo = infoTag2dl(variant.INFO.get(dbFlags['tag']))
             else:
                 annotInfo = info2dl(variant.INFO)
-            
-            ## Field separator
-            sep = dbFlags['sep'] 
 
-            ## No INFO field
+            # Field separator
+            sep = dbFlags['sep']
+
+            # No INFO field
             if dbInfo is None or annotInfo is None:
                 continue
-            
-            ## Indels
+
+            # Indels
             if args.filterIndels and variant.is_indel:
-                debugInfo=",".join([debugInfo, "INDEL"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "INDEL"])
+                if not args.debug:
                     continue
 
-            ## Variant Allele Frequency
-            fval=getTag(variant, callerFlags['freq'])
+            # Variant Allele Frequency
+            fval = getTag(variant, callerFlags['freq'])
             if fval is not None and fval < args.minVAF:
-                debugInfo=",".join([debugInfo, "VAF"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "VAF"])
+                if not args.debug:
                     continue
-                
-            ## Sequencing Depth
-            dval=getTag(variant, callerFlags['depth'])
+
+            # Sequencing Depth
+            dval = getTag(variant, callerFlags['depth'])
             if dval is not None and dval < args.minDepth:
-                debugInfo=",".join([debugInfo, "DEPTH"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "DEPTH"])
+                if not args.debug:
+                    continue
+
+            # Alternative allele Depth
+            ad = variant.format('AD')
+            if ad is not None and len(ad[0]) == 2 and ad[0][1] < args.minAltDepth:
+                print(ad)
+                debugInfo = ",".join([debugInfo, "ALTDEPTH"])
+                if not args.debug:
                     continue
 
-            ## Variant has a QUAL value or not PASS in the FILTER column
+            # Variant has a QUAL value or not PASS in the FILTER column
             if args.filterLowQual and (variant.QUAL is not None or variant.FILTER is not None):
-                debugInfo=",".join([debugInfo, "QUAL"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "QUAL"])
+                if not args.debug:
                     continue
- 
-            ## Coding variants
+
+            # Coding variants
             if args.filterCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isCoding'], sep=sep):
-                debugInfo=",".join([debugInfo, "CODING"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "CODING"])
+                if not args.debug:
                     continue
 
-            ## Splice variants
+            # Splice variants
             if args.filterSplice and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSplicing'], sep=sep):
-                debugInfo=",".join([debugInfo, "SPLICING"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "SPLICING"])
+                if not args.debug:
                     continue
 
-            ## Non-coding variants
+            # Non-coding variants
             if args.filterNonCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonCoding'], sep=sep):
-                debugInfo=",".join([debugInfo, "NONCODING"])
-                if not args.debug: 
-                    continue 
-                
-            ## Synonymous
+                debugInfo = ",".join([debugInfo, "NONCODING"])
+                if not args.debug:
+                    continue
+
+            # Synonymous
             if args.filterSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSynonymous'], sep=sep):
-                debugInfo=",".join([debugInfo, "SYN"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "SYN"])
+                if not args.debug:
                     continue
-            
-            ## Non synonymous
+
+            # Non synonymous
             if args.filterNonSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonSynonymous'], sep=sep):
-                debugInfo=",".join([debugInfo, "NON_SYN"])
-                if not args.debug: 
+                debugInfo = ",".join([debugInfo, "NON_SYN"])
+                if not args.debug:
                     continue
 
-            ## Hotpost
+            # Hotpost
             if args.filterCancerHotspot:
-                ## Flatten list of fields
-                fdb=[]
+                # Flatten list of fields
+                fdb = []
                 for db in args.cancerDb.split(','):
                     for x in dbFlags['cancerDb'][db]:
                         fdb.append(x)
- 
+
                 if isCancerHotspot(variant, infos=dbInfo, flags=fdb):
-                    debugInfo=",".join([debugInfo, "HOTSPOT"])
-                    if not args.debug: 
+                    debugInfo = ",".join([debugInfo, "HOTSPOT"])
+                    if not args.debug:
                         continue
 
-            ## Polymorphisms
+            # Polymorphisms
             if args.filterPolym:
-                ## Flatten list of fields
-                fdb=[]
+                # Flatten list of fields
+                fdb = []
                 for db in args.polymDb.split(','):
                     for x in dbFlags['polymDb'][db]:
                         fdb.append(x)
 
                 if isPolym(variant, infos=dbInfo, flags=fdb, val=args.minMAF):
-                    debugInfo=",".join([debugInfo, "POLYM"])
-                    if not args.debug: 
+                    debugInfo = ",".join([debugInfo, "POLYM"])
+                    if not args.debug:
                         continue
 
-            ## Recurrence
+            # Recurrence
             if args.filterRecurrence:
                 if isPolym(variant, infos=dbInfo, flags=dbFlags['recurrence']['run'], val=0):
-                    debugInfo=",".join([debugInfo, "RUNREC"])
+                    debugInfo = ",".join([debugInfo, "RUNREC"])
                     if not args.debug:
                         continue
 
@@ -383,14 +434,14 @@ def argsParse():
             warnings.warn("Warning : variant ", warnflag, " raises an error. Skipped so far ...")
             raise
 
-        ## Still alive
-        if debugInfo=="":
+        # Still alive
+        if debugInfo == "":
             varTMB += 1
             if args.export:
-              wx.write_record(variant)  
+                wx.write_record(variant)
 
         if args.debug:
-            variant.INFO["TMB_FILTERS"]=re.sub(r'^,', '', debugInfo)
+            variant.INFO["TMB_FILTERS"] = re.sub(r'^,', '', debugInfo)
             wd.write_record(variant)
 
     if args.export:
@@ -399,10 +450,10 @@ def argsParse():
         wd.close()
     vcf.close()
 
-    ## Calculate TMB
+    # Calculate TMB
     TMB = round(float(varTMB)/(float(effGS)/1e6), 2)
 
-    ## Output
+    # Output
     print("pyTMB version=", __version__)
     print("When=", date.today())
     print("")
@@ -415,7 +466,7 @@ def argsParse():
     print("minMAF=", args.minMAF)
     print("minDepth=", args.minDepth)
     print("filterLowQual=", args.filterLowQual)
-    print("filterIndels=", args.filterIndels) 
+    print("filterIndels=", args.filterIndels)
     print("filterCoding=", args.filterCoding)
     print("filterNonCoding=", args.filterNonCoding)
     print("filterSplice=", args.filterSplice)
@@ -428,4 +479,4 @@ def argsParse():
     print("Variants after filters=", varTMB)
     print("Effective Genome Size=", effGS)
     print("")
-    print("TMB=",TMB)
+    print("TMB=", TMB)

From c55098e4ff98c8f5ed6894de6b1dcffbbe6bc4d2 Mon Sep 17 00:00:00 2001
From: Gutman Tom <tgutman@shbli02f.curie.fr>
Date: Wed, 29 Apr 2020 01:21:47 +0200
Subject: [PATCH 05/15] modify getTag function & alt allele depth command

---
 bin/pyTMB.py       | 15 +++++++++------
 config/mutect2.yml |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index 7291292..2bff470 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -14,7 +14,7 @@
 #
 ##############################################################################
 
-__version__='1.2.0'
+__version__ = '1.2.0'
 
 """
 This script is designed to calculate a TMB score from a VCF file.
@@ -46,6 +46,7 @@
 import warnings
 import re
 import yaml
+import numpy as np
 import os.path
 from datetime import date
 def loadConfig(infile):
@@ -198,15 +199,17 @@ def getTag(v, tag):
     # First check in FORMAT field
     if tag in variant.FORMAT:
         val = variant.format(tag)
-        if float(val) < 0:
-            val = None
+        if np.shape(val) == (1, 1):
+            if val < 0:
+                val = None
 
     # Otherwise, check in INFO field
     if tag not in variant.FORMAT or val is None:
         val = variant.INFO.get(tag)
 
-    if val is not None:
-        val = float(val)
+    if np.shape(val) == (1, 1):
+        if val is not None:
+            val = float(val)
 
     return(val)
 
@@ -353,7 +356,7 @@ def argsParse():
                     continue
 
             # Alternative allele Depth
-            ad = variant.format('AD')
+            ad = getTag(variant, callerFlags['altDepth'])
             if ad is not None and len(ad[0]) == 2 and ad[0][1] < args.minAltDepth:
                 debugInfo = ",".join([debugInfo, "ALTDEPTH"])
                 if not args.debug:
diff --git a/config/mutect2.yml b/config/mutect2.yml
index 32bf22f..99e1450 100644
--- a/config/mutect2.yml
+++ b/config/mutect2.yml
@@ -6,3 +6,4 @@
 
 freq: 'AF'
 depth: 'DP'
+altDepth: 'AD'

From 5862cc5ceaa455d63531c4cd01f22764cbd8c9de Mon Sep 17 00:00:00 2001
From: Gutman Tom <tgutman@shdli007.curie.fr>
Date: Thu, 7 May 2020 02:02:18 +0200
Subject: [PATCH 06/15] hotfix minVAF

---
 bin/pyTMB.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index 2bff470..990ee92 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -343,6 +343,10 @@ def argsParse():
 
             # Variant Allele Frequency
             fval = getTag(variant, callerFlags['freq'])
+            if type(fval) == np.ndarray:
+                debugInfo = ",".join([debugInfo, "MULTI_VAF"])
+                if not args.debug:
+                    continue
             if fval is not None and fval < args.minVAF:
                 debugInfo = ",".join([debugInfo, "VAF"])
                 if not args.debug:

From f38c78e10cc2f811e1721ead07ebb62a19bb36bd Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Wed, 13 May 2020 23:19:17 +0200
Subject: [PATCH 07/15] [MODIF] first test for multiallelic case

---
 bin/pyTMB_test.py | 491 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 491 insertions(+)
 create mode 100644 bin/pyTMB_test.py

diff --git a/bin/pyTMB_test.py b/bin/pyTMB_test.py
new file mode 100644
index 0000000..9ffd268
--- /dev/null
+++ b/bin/pyTMB_test.py
@@ -0,0 +1,491 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+#  This file is part of pyTMB software.
+#
+#  Copyright (c) 2020 - Institut Curie
+#
+#  File author(s):
+#      Tom Gutman <tom.gutman@curie.fr>,
+#      Nicolas Servant <nicolas.servant@curie.fr>
+#
+#  Distributed under the terms of the CeCILL-B license.
+#  The full license is in the LICENSE file, distributed with this software.
+#
+##############################################################################
+
+__version__ = '1.2.0'
+
+"""
+This script is designed to calculate a TMB score from a VCF file.
+If no filters are specified, all variants will be used.
+
+Let's defined a TMB as a score using PASS, non-synonymous, coding, non polymorphism variants ...
+In this case, a typical usage would be :
+
+python pyTMB.py -i ${VCF} --minDepth 100 \
+--filterLowQual \
+--filterNonCoding \
+--filterSplice \
+--filterSyn \
+--filterPolym  --minMAF 0.001 --polymDb 1k,gnomad \
+--effGenomeSize 1590000 > TMB_results.log
+"""
+
+import cyvcf2
+import argparse
+import sys
+import warnings
+import re
+import yaml
+import numpy as np
+import os.path
+from datetime import date
+
+"""
+Load yaml file
+"""
+def loadConfig(infile):
+    with open(infile, 'r') as stream:
+        try:
+            return(yaml.safe_load(stream))
+        except:
+            raise
+
+
+def getMultiAlleleHeader(vcf):
+    FORMAT=[]
+    INFO=[]
+    for h in vcf.header_iter():
+        i = h.info(extra=True)
+        if 'Number' in i.keys() and i['Number'] == 'A':
+            if i['HeaderType'] == 'FORMAT':
+                FORMAT.append(i['ID'])
+            elif i['HeaderType'] == 'INFO':
+                INFO.append(i['ID'])
+    return(dict(FORMAT=FORMAT, INFO=INFO))
+
+"""
+Calculate Effective Genome Size from a BED file
+"""
+def getEffGenomeSizeFromBed(infile, verbose=False):
+
+    if verbose:
+        print("## Loading BED file '", infile, "'...")
+
+    bedhandle = open(infile)
+    effgs = 0
+    nline = 0
+    for line in bedhandle:
+        bedtab = line.strip().split("\t")
+        nline += 1
+        try:
+            chromosome, start, end = bedtab[:3]
+        except ValueError:
+            sys.stderr.write("Error : wrong input format in line", nline, ". Not a BED file !?")
+            sys.exit(-1)
+
+        intl = abs(int(end) - int(start))
+        effgs += intl
+
+    bedhandle.close()
+    return effgs
+
+
+"""
+Check if a variant has the provided annotation flags
+"""
+def isAnnotatedAs(v, infos, flags, sep):
+
+    # Subset annotation information as list
+    subINFO = subsetINFO(infos, keys=flags.keys())
+    # Compare list variant information and expected list of tags
+    for sub in subINFO:
+        # For all keys
+        for k in flags.keys():
+            # For each value in keys
+            for val in flags[k]:
+                # Use search to deal with multiple annotations
+                for subval in sub[k].split(sep):
+                    if val == subval:
+                        return(True)
+    return(False)
+
+
+"""
+Check if a variant is in a genomeDb with a MAF > val
+"""
+def isPolym(v, infos, flags, val):
+    subINFO = subsetINFO(infos, keys=flags)
+    for key in subINFO:
+        if type(subINFO[key]) is tuple:
+            for i in subINFO[key]:
+                if i is not None and float(i) >= float(val):
+                    return True    
+        elif subINFO[key] is not None and subINFO[key] != ".":
+            if subINFO[key] is not None and float(sk[i]) >= float(val):
+                return True
+    return(False)
+
+
+"""
+Check if a variant is annotated as a cancer hotspot
+"""
+def isCancerHotspot(v, infos, flags):
+    subINFO = subsetINFO(infos, keys=flags)
+    for key in subINFO:
+        if subINFO[key] is not None and subINFO[key] != ".":
+            return True
+    return(False)
+
+
+"""
+Subset the annotation information to a few key values
+"""
+def subsetINFO(annot, keys):
+
+    if isinstance(annot, list):
+        subsetInfo = []
+        for i in range(0, len(annot)):
+            z = dict((k, annot[i][k]) for k in keys if k in annot[i])
+            if len(z) > 0:
+                subsetInfo.append(z)
+    else:
+        subsetInfo = dict((k, annot[k]) for k in keys if k in annot)
+
+    return(subsetInfo)
+
+
+"""
+Format the INFO field from snpEff and return a list of dict
+ie. snpEff
+"""
+def infoTag2dl(INFO):
+
+    if INFO is not None:
+        annotTag = INFO.split(',')
+        annotInfo = []
+        for i in range(0, len(annotTag)):
+            annot = annotTag[i].split('|')
+            dictannot = {i: annot[i] for i in range(0, len(annot))}
+            annotInfo.append(dictannot)
+        return(annotInfo)
+
+
+"""
+Format the INFO field from ANNOVAR and return a list of dict
+ie. annovar
+"""
+def info2dl(INFO):
+
+    if INFO is not None:
+        return [dict(INFO)]
+
+
+"""
+Get a tag value from either the format field or the info field
+Return a 2D numpy array
+"""
+def getTag(v, tag):
+
+    # First check in FORMAT field
+    if tag in variant.FORMAT:
+        val = variant.format(tag)
+        #if np.shape(val) == (1, 1) and val < 0:
+        #        val = None
+
+    # Otherwise, check in INFO field
+    if tag not in variant.FORMAT or val is None:
+        val = variant.INFO.get(tag)
+
+    #if np.shape(val) == (1, 1) and val is not None:
+    #    val = float(val)
+
+    if type(val) != np.ndarray:
+        val = np.array([val], float)
+
+    return(val)
+
+
+"""
+Parse inputs
+"""
+def argsParse():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
+
+    # Configs
+    parser.add_argument("--dbConfig", help="Databases config file",
+                        default="./config/databases.yml")
+    parser.add_argument("--varConfig", help="Variant calling config file",
+                        default="./config/calling.yml")
+
+    # Efective genome size
+    parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
+    parser.add_argument(
+        "--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
+
+    # Thresholds
+    parser.add_argument(
+        "--minVAF", help="Filter variants with Allelic Ratio < minVAF", type=float, default=0.05)
+    parser.add_argument("--minMAF", help="Filter variants with MAF < minMAF",
+                        type=float, default=0.001)
+    parser.add_argument(
+        "--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
+    parser.add_argument(
+        "--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
+
+    # Which variants to use
+    parser.add_argument("--filterLowQual",
+                        help="Filter low quality (i.e not PASS) variant", action="store_true")
+    parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
+    parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
+    parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
+    parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
+    parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
+    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",
+                        action="store_true")
+    parser.add_argument("--filterCancerHotspot",
+                        help="Filter variants annotated as cancer hotspots", action="store_true")
+    parser.add_argument(
+        "--filterPolym", help="Filter polymorphism variants in genome databases. See --minMAF", action="store_true")
+    parser.add_argument("--filterRecurrence",
+                        help="Filter on recurrence values", action="store_true")
+
+    # Databases
+    parser.add_argument(
+        "--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
+    parser.add_argument(
+        "--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
+
+    # Others
+    parser.add_argument("--verbose", action="store_true")
+    parser.add_argument("--debug", action="store_true")
+    parser.add_argument("--export", help="", action="store_true")
+    parser.add_argument("--version", action='version', version="%(prog)s ("+__version__+")")
+
+    args = parser.parse_args()
+    return (args)
+
+
+if __name__ == "__main__":
+
+    args = argsParse()
+
+    # Loading Data
+    vcf = cyvcf2.VCF(args.vcf)
+    #maHeader = getMultiAlleleHeader(vcf)
+    
+    if len(vcf.samples) > 1:
+        sys.stderr.write("Error: " + len(vcf.samples) + " sample detected. This version is designed for a single sample !")
+        sys.exit(-1)
+
+    if args.export:
+        wx = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
+                                  '_export.vcf', os.path.basename(args.vcf)), vcf)
+
+    if args.debug:
+        vcf.add_info_to_header({'ID': 'TMB_FILTERS', 'Description': 'Detected filters for TMB calculation',
+                                'Type': 'Character', 'Number': '1'})
+        wd = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
+                                  '_debug.vcf', os.path.basename(args.vcf)), vcf)
+
+    dbFlags = loadConfig(args.dbConfig)
+    callerFlags = loadConfig(args.varConfig)
+
+    varCounter = 0
+    varNI = 0
+    varTMB = 0
+
+    if args.effGenomeSize is None:
+        if args.bed is not None:
+            effGS = getEffGenomeSizeFromBed(args.bed)
+        else:
+            sys.stderr.write("Error: Effective Genome Size not specified. See --effGenomeSize or --bed")
+            sys.exit(-1)
+    else:
+        effGS = args.effGenomeSize
+
+    for variant in vcf:
+        varCounter += 1
+        if (varCounter % 1000 == 0 and args.verbose):
+            print ("## ", varCounter)
+            if args.debug and varCounter == 1000:
+                sys.exit()
+
+        try:
+            if len(variant.ALT) == 1:
+                continue
+
+            # All vcf INFO
+            dbInfo = dict(variant.INFO)
+            debugInfo = ""
+
+            # Get annotation INFO as a list of dict
+            if dbFlags['tag'] != '':
+                annotInfo = infoTag2dl(variant.INFO.get(dbFlags['tag']))
+            else:
+                annotInfo = info2dl(variant.INFO)
+
+            # No INFO field
+            if dbInfo is None or annotInfo is None:
+                varNI += 1
+                continue
+
+            # Indels
+            if args.filterIndels and variant.is_indel:
+                debugInfo = ",".join([debugInfo, "INDEL"])
+                if not args.debug:
+                    continue
+
+            # Variant has a QUAL value or not PASS in the FILTER column
+            if args.filterLowQual and (variant.QUAL is not None or variant.FILTER is not None):
+                debugInfo = ",".join([debugInfo, "QUAL"])
+                if not args.debug:
+                    continue
+
+            #######################
+            ## FORMAT
+            #######################
+
+            # Variant Allele Frequency
+            fval = getTag(variant, callerFlags['freq'])
+            if fval is not None and len(fval[fval < args.minVAF]) == len(variant.ALT):
+                debugInfo = ",".join([debugInfo, "VAF"])
+                if not args.debug:
+                    continue
+
+            # Sequencing Depth
+            dval = getTag(variant, callerFlags['depth'])
+            if dval is not None and len(dval[dval < args.minDepth]) == len(variant.ALT):
+                debugInfo = ",".join([debugInfo, "DEPTH"])
+                if not args.debug:
+                    continue
+
+            # Alternative allele Depth
+            ad = getTag(variant, callerFlags['altDepth'])
+            if ad is not None and len(ad[ad < args.minAltDepth]) == len(variant.ALT):
+                debugInfo = ",".join([debugInfo, "ALTDEPTH"])
+                if not args.debug:
+                    continue
+
+            ######################
+            ## Annotation INFO
+            ######################
+
+            # Coding variants
+            if args.filterCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isCoding'], sep=dbFlags['sep']):
+                debugInfo = ",".join([debugInfo, "CODING"])
+                if not args.debug:
+                    continue
+
+            # Splice variants
+            if args.filterSplice and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSplicing'], sep=dbFlags['sep']):
+                debugInfo = ",".join([debugInfo, "SPLICING"])
+                if not args.debug:
+                    continue
+
+            # Non-coding variants
+            if args.filterNonCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonCoding'], sep=dbFlags['sep']):
+                debugInfo = ",".join([debugInfo, "NONCODING"])
+                if not args.debug:
+                    continue
+
+            # Synonymous
+            if args.filterSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSynonymous'], sep=dbFlags['sep']):
+                debugInfo = ",".join([debugInfo, "SYN"])
+                if not args.debug:
+                    continue
+
+            # Non synonymous
+            if args.filterNonSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonSynonymous'], sep=dbFlags['sep']):
+                debugInfo = ",".join([debugInfo, "NON_SYN"])
+                if not args.debug:
+                    continue
+
+            # Hotspot
+            if args.filterCancerHotspot:
+                # Flatten list of fields
+                fdb = []
+                for db in args.cancerDb.split(','):
+                    for x in dbFlags['cancerDb'][db]:
+                        fdb.append(x)
+
+                if isCancerHotspot(variant, infos=dbInfo, flags=fdb):
+                    debugInfo = ",".join([debugInfo, "HOTSPOT"])
+                    if not args.debug:
+                        continue
+
+            # Polymorphisms
+            if args.filterPolym:
+                # Flatten list of fields
+                fdb = []
+                for db in args.polymDb.split(','):
+                    for x in dbFlags['polymDb'][db]:
+                        fdb.append(x)
+
+                if isPolym(variant, infos=dbInfo, flags=fdb, val=args.minMAF):
+                    debugInfo = ",".join([debugInfo, "POLYM"])
+                    if not args.debug:
+                        continue
+
+            # Recurrence
+            if args.filterRecurrence:
+                if isPolym(variant, infos=dbInfo, flags=dbFlags['recurrence']['run'], val=0):
+                    debugInfo = ",".join([debugInfo, "RUNREC"])
+                    if not args.debug:
+                        continue
+
+        except:
+            warnflag = str(variant.CHROM) + ":" + str(variant.start) + "-" + str(variant.end)
+            warnings.warn("Warning : variant ", warnflag, " raises an error. Skipped so far ...")
+            raise
+
+        # Still alive
+        if debugInfo == "":
+            varTMB += 1
+            if args.export:
+                wx.write_record(variant)
+
+        if args.debug:
+            variant.INFO["TMB_FILTERS"] = re.sub(r'^,', '', debugInfo)
+            wd.write_record(variant)
+
+    if args.export:
+        wx.close()
+    if args.debug:
+        wd.close()
+    vcf.close()
+
+    # Calculate TMB
+    TMB = round(float(varTMB)/(float(effGS)/1e6), 2)
+
+    # Output
+    print("pyTMB version=", __version__)
+    print("When=", date.today())
+    print("")
+    print("Config caller=", args.varConfig)
+    print("Config databases=", args.dbConfig)
+    print("")
+    print("Filters:")
+    print("-------")
+    print("minVAF=", args.minVAF)
+    print("minMAF=", args.minMAF)
+    print("minDepth=", args.minDepth)
+    print("minAltDepth=", args.minAltDepth)
+    print("filterLowQual=", args.filterLowQual)
+    print("filterIndels=", args.filterIndels)
+    print("filterCoding=", args.filterCoding)
+    print("filterNonCoding=", args.filterNonCoding)
+    print("filterSplice=", args.filterSplice)
+    print("filterSyn=", args.filterSyn)
+    print("filterNonSyn=", args.filterNonSyn)
+    print("filterConcerHostpot=", args.filterCancerHotspot)
+    print("filterPolym=", args.filterPolym)
+    print("")
+    print("Total number of variants=", varCounter)
+    print("Non-informative variants=", varNI)
+    print("Variants after filters=", varTMB)
+    print("Effective Genome Size=", effGS)
+    print("")
+    print("TMB=", TMB)

From 2f90bc8a54f59ac7197d8e291344d1dd0ee795c0 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Thu, 14 May 2020 09:55:57 +0200
Subject: [PATCH 08/15] [MODIF] first test for multiallelic case

---
 README.md         | 32 ++++++++++++++----
 bin/pyTMB_test.py | 86 ++++++++++++++++++++++-------------------------
 2 files changed, 67 insertions(+), 51 deletions(-)

diff --git a/README.md b/README.md
index 48b72a3..3fb9ba3 100644
--- a/README.md
+++ b/README.md
@@ -182,14 +182,17 @@ This option allows to export a vcf file which only contains the variants used fo
 The option allows to export a vcf file with the tag **TMB_FILTERS** in the **INFO** field. This tag therefore contains the reason for which a variant would be filtered.
 
 
-## Examples
+## Usage and recommandations
 
-Let's calculated the TMB on a gene panel vcf file (coding size = 1.9Mb, caller = varscan, annotation = Annovar) with the following criteria: 
-- PASS
+Here is a list of recommanded parameters for different user cases.
+
+### Gene Panel
+
+Let's calculated the TMB on a gene panel vcf file (coding size = 1.59Mb, caller = varscan, annotation = Annovar) with the following criteria: 
 - minDepth at 100X
 - non-synonymous
-- coding
-- non polymorphism variants using 1K, gnomAD databases and a minMAF of 0.001
+- coding and splice
+- non polymorphism variants using 1K, gnomAD databases and a MAF of 0.001
 
 In this case, a typical usage would be :
 
@@ -202,11 +205,28 @@ python pyTMB.py -i ${VCF} \
 --filterNonCoding \
 --filterSplice \
 --filterSyn \
---filterPolym  --minMAF 0.001 --polymDb 1k,gnomad \
+--filterPolym --minMAF 0.001 --polymDb 1k,gnomad \
 --effGenomeSize 1590000 \
 --export > TMB_results.log
 ```
 
+### Exome / Whole Genome Sequencing
+
+For larger panel, we recommand the following parameters ;
+
+```
+python pyTMB.py -i ${VCF} \
+--dbConfig conf/snpeff.yml \
+--varConfig conf/mutect2.yml \
+--minVAF 0.05 \
+--filterLowQual \
+--minDepth 100 \
+--filterNonCoding \
+--filterIndel \
+--filterSyn \
+--filterPolym --minMAF 0.001 --polymDb 1k,gnomad
+```
+
 ### Credits
 
 This pipeline has been written by the bioinformatics core facility in close collaboration with the Clinical Bioinformatics and the Genetics Service of the Institut Curie.
diff --git a/bin/pyTMB_test.py b/bin/pyTMB_test.py
index 9ffd268..6e5e0b1 100644
--- a/bin/pyTMB_test.py
+++ b/bin/pyTMB_test.py
@@ -14,7 +14,7 @@
 #
 ##############################################################################
 
-__version__ = '1.2.0'
+__version__ = '1.2.1'
 
 """
 This script is designed to calculate a TMB score from a VCF file.
@@ -28,7 +28,7 @@
 --filterNonCoding \
 --filterSplice \
 --filterSyn \
---filterPolym  --minMAF 0.001 --polymDb 1k,gnomad \
+--filterPolym  --maf 0.001 --polymDb 1k,gnomad \
 --effGenomeSize 1590000 > TMB_results.log
 """
 
@@ -46,6 +46,7 @@
 Load yaml file
 """
 def loadConfig(infile):
+
     with open(infile, 'r') as stream:
         try:
             return(yaml.safe_load(stream))
@@ -54,6 +55,7 @@ def loadConfig(infile):
 
 
 def getMultiAlleleHeader(vcf):
+
     FORMAT=[]
     INFO=[]
     for h in vcf.header_iter():
@@ -116,14 +118,16 @@ def isAnnotatedAs(v, infos, flags, sep):
 Check if a variant is in a genomeDb with a MAF > val
 """
 def isPolym(v, infos, flags, val):
+
     subINFO = subsetINFO(infos, keys=flags)
     for key in subINFO:
         if type(subINFO[key]) is tuple:
             for i in subINFO[key]:
-                if i is not None and float(i) >= float(val):
-                    return True    
+                if i is not None and i != ".": 
+                    if float(i) >= float(val):
+                        return True
         elif subINFO[key] is not None and subINFO[key] != ".":
-            if subINFO[key] is not None and float(sk[i]) >= float(val):
+            if float(subINFO[key]) >= float(val):
                 return True
     return(False)
 
@@ -132,6 +136,7 @@ def isPolym(v, infos, flags, val):
 Check if a variant is annotated as a cancer hotspot
 """
 def isCancerHotspot(v, infos, flags):
+
     subINFO = subsetINFO(infos, keys=flags)
     for key in subINFO:
         if subINFO[key] is not None and subINFO[key] != ".":
@@ -152,7 +157,6 @@ def subsetINFO(annot, keys):
                 subsetInfo.append(z)
     else:
         subsetInfo = dict((k, annot[k]) for k in keys if k in annot)
-
     return(subsetInfo)
 
 
@@ -211,58 +215,46 @@ def getTag(v, tag):
 Parse inputs
 """
 def argsParse():
+
     parser = argparse.ArgumentParser()
     parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
 
     # Configs
-    parser.add_argument("--dbConfig", help="Databases config file",
-                        default="./config/databases.yml")
-    parser.add_argument("--varConfig", help="Variant calling config file",
-                        default="./config/calling.yml")
+    parser.add_argument("--dbConfig", help="Databases config file", type=str, default="./config/databases.yml")
+    parser.add_argument("--varConfig", help="Variant calling config file", type=str, default="./config/calling.yml")
+    parser.add_argument("--sample", help="Specify the sample ID to focus on", type=str, default=None)
 
     # Efective genome size
     parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
-    parser.add_argument(
-        "--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
+    parser.add_argument("--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
 
     # Thresholds
-    parser.add_argument(
-        "--minVAF", help="Filter variants with Allelic Ratio < minVAF", type=float, default=0.05)
-    parser.add_argument("--minMAF", help="Filter variants with MAF < minMAF",
-                        type=float, default=0.001)
-    parser.add_argument(
-        "--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
-    parser.add_argument(
-        "--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
+    parser.add_argument("--vaf", help="Filter variants with Allelic Ratio < vaf", type=float, default=0.05)
+    parser.add_argument("--maf", help="Filter variants with MAF > maf", type=float, default=0.001)
+    parser.add_argument("--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
+    parser.add_argument("--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
 
     # Which variants to use
-    parser.add_argument("--filterLowQual",
-                        help="Filter low quality (i.e not PASS) variant", action="store_true")
+    parser.add_argument("--filterLowQual", help="Filter low quality (i.e not PASS) variant", action="store_true")
     parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
     parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
     parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
     parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
     parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
-    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",
-                        action="store_true")
-    parser.add_argument("--filterCancerHotspot",
-                        help="Filter variants annotated as cancer hotspots", action="store_true")
-    parser.add_argument(
-        "--filterPolym", help="Filter polymorphism variants in genome databases. See --minMAF", action="store_true")
-    parser.add_argument("--filterRecurrence",
-                        help="Filter on recurrence values", action="store_true")
+    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",action="store_true")
+    parser.add_argument("--filterCancerHotspot",help="Filter variants annotated as cancer hotspots", action="store_true")
+    parser.add_argument("--filterPolym", help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
+    parser.add_argument("--filterRecurrence", help="Filter on recurrence values", action="store_true")
 
     # Databases
-    parser.add_argument(
-        "--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
-    parser.add_argument(
-        "--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
+    parser.add_argument("--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
+    parser.add_argument("--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
 
     # Others
-    parser.add_argument("--verbose", action="store_true")
-    parser.add_argument("--debug", action="store_true")
-    parser.add_argument("--export", help="", action="store_true")
-    parser.add_argument("--version", action='version', version="%(prog)s ("+__version__+")")
+    parser.add_argument("--verbose", help="Active verbose mode", action="store_true")
+    parser.add_argument("--debug", help="Export original VCF with TMB_FILTER tag", action="store_true")
+    parser.add_argument("--export", help="Export a VCF with the considered variants", action="store_true")
+    parser.add_argument("--version", help="Version number", action='version', version="%(prog)s ("+__version__+")")
 
     args = parser.parse_args()
     return (args)
@@ -273,7 +265,11 @@ def argsParse():
     args = argsParse()
 
     # Loading Data
-    vcf = cyvcf2.VCF(args.vcf)
+    if args.sample is not None:
+        vcf = cyvcf2.VCF(args.vcf, samples=args.sample)
+    else:
+        vcf = cyvcf2.VCF(args.vcf)
+
     #maHeader = getMultiAlleleHeader(vcf)
     
     if len(vcf.samples) > 1:
@@ -314,8 +310,8 @@ def argsParse():
                 sys.exit()
 
         try:
-            if len(variant.ALT) == 1:
-                continue
+            #if len(variant.ALT) == 1:
+            #    continue
 
             # All vcf INFO
             dbInfo = dict(variant.INFO)
@@ -350,7 +346,7 @@ def argsParse():
 
             # Variant Allele Frequency
             fval = getTag(variant, callerFlags['freq'])
-            if fval is not None and len(fval[fval < args.minVAF]) == len(variant.ALT):
+            if fval is not None and len(fval[fval < args.vaf]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "VAF"])
                 if not args.debug:
                     continue
@@ -424,7 +420,7 @@ def argsParse():
                     for x in dbFlags['polymDb'][db]:
                         fdb.append(x)
 
-                if isPolym(variant, infos=dbInfo, flags=fdb, val=args.minMAF):
+                if isPolym(variant, infos=dbInfo, flags=fdb, val=args.maf):
                     debugInfo = ",".join([debugInfo, "POLYM"])
                     if not args.debug:
                         continue
@@ -469,8 +465,8 @@ def argsParse():
     print("")
     print("Filters:")
     print("-------")
-    print("minVAF=", args.minVAF)
-    print("minMAF=", args.minMAF)
+    print("VAF=", args.vaf)
+    print("MAF=", args.maf)
     print("minDepth=", args.minDepth)
     print("minAltDepth=", args.minAltDepth)
     print("filterLowQual=", args.filterLowQual)

From ede382e35b9679c6fafd7159d47f342092ffc9c3 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Thu, 14 May 2020 13:44:48 +0200
Subject: [PATCH 09/15] [MODIF] first test for multiallelic case

---
 bin/pyTMB_test.py   | 2 ++
 config/varscan2.yml | 1 +
 2 files changed, 3 insertions(+)

diff --git a/bin/pyTMB_test.py b/bin/pyTMB_test.py
index 6e5e0b1..dc069fa 100644
--- a/bin/pyTMB_test.py
+++ b/bin/pyTMB_test.py
@@ -207,6 +207,8 @@ def getTag(v, tag):
 
     if type(val) != np.ndarray:
         val = np.array([val], float)
+    else:
+        val = val.astype('float')
 
     return(val)
 
diff --git a/config/varscan2.yml b/config/varscan2.yml
index 5216229..148d32b 100644
--- a/config/varscan2.yml
+++ b/config/varscan2.yml
@@ -6,4 +6,5 @@
 
 freq: 'ALLELIC_RATIO'
 depth: 'DP'
+altDepth: 'AD'
 

From b5da36b1d1630bc79d7f5694318412140e2e5657 Mon Sep 17 00:00:00 2001
From: Gutman Tom <tgutman@shdli007.curie.fr>
Date: Thu, 14 May 2020 23:20:52 +0200
Subject: [PATCH 10/15] syntax modif + recommended params

---
 bin/pyTMB_test.py | 108 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 75 insertions(+), 33 deletions(-)

diff --git a/bin/pyTMB_test.py b/bin/pyTMB_test.py
index 6e5e0b1..f4a6793 100644
--- a/bin/pyTMB_test.py
+++ b/bin/pyTMB_test.py
@@ -23,13 +23,15 @@
 Let's defined a TMB as a score using PASS, non-synonymous, coding, non polymorphism variants ...
 In this case, a typical usage would be :
 
-python pyTMB.py -i ${VCF} --minDepth 100 \
+python pyTMB.py -i ${VCF} --effGenomeSize 33280000 \
+--vaf 0.05 --maf 0.001 --minDepth 20 --minAltDepth 3\
 --filterLowQual \
 --filterNonCoding \
---filterSplice \
 --filterSyn \
---filterPolym  --maf 0.001 --polymDb 1k,gnomad \
---effGenomeSize 1590000 > TMB_results.log
+--filterPolym \
+--polymDb 1k,gnomad \
+--dbConfig ${DB_CONFIG} \
+--varConfig ${VAR_CONFIG} > TMB_results.log
 """
 
 import cyvcf2
@@ -45,6 +47,8 @@
 """
 Load yaml file
 """
+
+
 def loadConfig(infile):
 
     with open(infile, 'r') as stream:
@@ -56,8 +60,8 @@ def loadConfig(infile):
 
 def getMultiAlleleHeader(vcf):
 
-    FORMAT=[]
-    INFO=[]
+    FORMAT = []
+    INFO = []
     for h in vcf.header_iter():
         i = h.info(extra=True)
         if 'Number' in i.keys() and i['Number'] == 'A':
@@ -67,9 +71,12 @@ def getMultiAlleleHeader(vcf):
                 INFO.append(i['ID'])
     return(dict(FORMAT=FORMAT, INFO=INFO))
 
+
 """
 Calculate Effective Genome Size from a BED file
 """
+
+
 def getEffGenomeSizeFromBed(infile, verbose=False):
 
     if verbose:
@@ -97,6 +104,8 @@ def getEffGenomeSizeFromBed(infile, verbose=False):
 """
 Check if a variant has the provided annotation flags
 """
+
+
 def isAnnotatedAs(v, infos, flags, sep):
 
     # Subset annotation information as list
@@ -117,13 +126,15 @@ def isAnnotatedAs(v, infos, flags, sep):
 """
 Check if a variant is in a genomeDb with a MAF > val
 """
+
+
 def isPolym(v, infos, flags, val):
 
     subINFO = subsetINFO(infos, keys=flags)
     for key in subINFO:
         if type(subINFO[key]) is tuple:
             for i in subINFO[key]:
-                if i is not None and i != ".": 
+                if i is not None and i != ".":
                     if float(i) >= float(val):
                         return True
         elif subINFO[key] is not None and subINFO[key] != ".":
@@ -135,6 +146,8 @@ def isPolym(v, infos, flags, val):
 """
 Check if a variant is annotated as a cancer hotspot
 """
+
+
 def isCancerHotspot(v, infos, flags):
 
     subINFO = subsetINFO(infos, keys=flags)
@@ -147,6 +160,8 @@ def isCancerHotspot(v, infos, flags):
 """
 Subset the annotation information to a few key values
 """
+
+
 def subsetINFO(annot, keys):
 
     if isinstance(annot, list):
@@ -164,6 +179,8 @@ def subsetINFO(annot, keys):
 Format the INFO field from snpEff and return a list of dict
 ie. snpEff
 """
+
+
 def infoTag2dl(INFO):
 
     if INFO is not None:
@@ -180,6 +197,8 @@ def infoTag2dl(INFO):
 Format the INFO field from ANNOVAR and return a list of dict
 ie. annovar
 """
+
+
 def info2dl(INFO):
 
     if INFO is not None:
@@ -190,19 +209,21 @@ def info2dl(INFO):
 Get a tag value from either the format field or the info field
 Return a 2D numpy array
 """
+
+
 def getTag(v, tag):
 
     # First check in FORMAT field
     if tag in variant.FORMAT:
         val = variant.format(tag)
-        #if np.shape(val) == (1, 1) and val < 0:
+        # if np.shape(val) == (1, 1) and val < 0:
         #        val = None
 
     # Otherwise, check in INFO field
     if tag not in variant.FORMAT or val is None:
         val = variant.INFO.get(tag)
 
-    #if np.shape(val) == (1, 1) and val is not None:
+    # if np.shape(val) == (1, 1) and val is not None:
     #    val = float(val)
 
     if type(val) != np.ndarray:
@@ -214,47 +235,66 @@ def getTag(v, tag):
 """
 Parse inputs
 """
+
+
 def argsParse():
 
     parser = argparse.ArgumentParser()
     parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
 
     # Configs
-    parser.add_argument("--dbConfig", help="Databases config file", type=str, default="./config/databases.yml")
-    parser.add_argument("--varConfig", help="Variant calling config file", type=str, default="./config/calling.yml")
-    parser.add_argument("--sample", help="Specify the sample ID to focus on", type=str, default=None)
+    parser.add_argument("--dbConfig", help="Databases config file",
+                        type=str, default="./config/databases.yml")
+    parser.add_argument("--varConfig", help="Variant calling config file",
+                        type=str, default="./config/calling.yml")
+    parser.add_argument("--sample", help="Specify the sample ID to focus on",
+                        type=str, default=None)
 
     # Efective genome size
     parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
-    parser.add_argument("--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
+    parser.add_argument(
+        "--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
 
     # Thresholds
-    parser.add_argument("--vaf", help="Filter variants with Allelic Ratio < vaf", type=float, default=0.05)
+    parser.add_argument("--vaf", help="Filter variants with Allelic Ratio < vaf",
+                        type=float, default=0.05)
     parser.add_argument("--maf", help="Filter variants with MAF > maf", type=float, default=0.001)
-    parser.add_argument("--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
-    parser.add_argument("--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
+    parser.add_argument(
+        "--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
+    parser.add_argument(
+        "--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
 
     # Which variants to use
-    parser.add_argument("--filterLowQual", help="Filter low quality (i.e not PASS) variant", action="store_true")
+    parser.add_argument("--filterLowQual",
+                        help="Filter low quality (i.e not PASS) variant", action="store_true")
     parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
     parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
     parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
     parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
     parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
-    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",action="store_true")
-    parser.add_argument("--filterCancerHotspot",help="Filter variants annotated as cancer hotspots", action="store_true")
-    parser.add_argument("--filterPolym", help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
-    parser.add_argument("--filterRecurrence", help="Filter on recurrence values", action="store_true")
+    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",
+                        action="store_true")
+    parser.add_argument("--filterCancerHotspot",
+                        help="Filter variants annotated as cancer hotspots", action="store_true")
+    parser.add_argument(
+        "--filterPolym", help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
+    parser.add_argument("--filterRecurrence",
+                        help="Filter on recurrence values", action="store_true")
 
     # Databases
-    parser.add_argument("--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
-    parser.add_argument("--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
+    parser.add_argument(
+        "--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
+    parser.add_argument(
+        "--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
 
     # Others
     parser.add_argument("--verbose", help="Active verbose mode", action="store_true")
-    parser.add_argument("--debug", help="Export original VCF with TMB_FILTER tag", action="store_true")
-    parser.add_argument("--export", help="Export a VCF with the considered variants", action="store_true")
-    parser.add_argument("--version", help="Version number", action='version', version="%(prog)s ("+__version__+")")
+    parser.add_argument(
+        "--debug", help="Export original VCF with TMB_FILTER tag", action="store_true")
+    parser.add_argument(
+        "--export", help="Export a VCF with the considered variants", action="store_true")
+    parser.add_argument("--version", help="Version number", action='version',
+                        version="%(prog)s ("+__version__+")")
 
     args = parser.parse_args()
     return (args)
@@ -270,10 +310,11 @@ def argsParse():
     else:
         vcf = cyvcf2.VCF(args.vcf)
 
-    #maHeader = getMultiAlleleHeader(vcf)
-    
+    # maHeader = getMultiAlleleHeader(vcf)
+
     if len(vcf.samples) > 1:
-        sys.stderr.write("Error: " + len(vcf.samples) + " sample detected. This version is designed for a single sample !")
+        sys.stderr.write("Error: " + len(vcf.samples) +
+                         " sample detected. This version is designed for a single sample !")
         sys.exit(-1)
 
     if args.export:
@@ -297,7 +338,8 @@ def argsParse():
         if args.bed is not None:
             effGS = getEffGenomeSizeFromBed(args.bed)
         else:
-            sys.stderr.write("Error: Effective Genome Size not specified. See --effGenomeSize or --bed")
+            sys.stderr.write(
+                "Error: Effective Genome Size not specified. See --effGenomeSize or --bed")
             sys.exit(-1)
     else:
         effGS = args.effGenomeSize
@@ -310,7 +352,7 @@ def argsParse():
                 sys.exit()
 
         try:
-            #if len(variant.ALT) == 1:
+            # if len(variant.ALT) == 1:
             #    continue
 
             # All vcf INFO
@@ -341,7 +383,7 @@ def argsParse():
                     continue
 
             #######################
-            ## FORMAT
+            # FORMAT
             #######################
 
             # Variant Allele Frequency
@@ -366,7 +408,7 @@ def argsParse():
                     continue
 
             ######################
-            ## Annotation INFO
+            # Annotation INFO
             ######################
 
             # Coding variants

From b459c80d8f6a698fc9f6335591c8b5a3fce0598a Mon Sep 17 00:00:00 2001
From: Gutman Tom <tgutman@shdli007.curie.fr>
Date: Thu, 14 May 2020 23:24:45 +0200
Subject: [PATCH 11/15] old modif not relevant anymore

---
 bin/pyTMB.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index 990ee92..5c9a36f 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -343,11 +343,7 @@ def argsParse():
 
             # Variant Allele Frequency
             fval = getTag(variant, callerFlags['freq'])
-            if type(fval) == np.ndarray:
-                debugInfo = ",".join([debugInfo, "MULTI_VAF"])
-                if not args.debug:
-                    continue
-            if fval is not None and fval < args.minVAF:
+            if type(fval) == float and fval is not None and fval < args.minVAF:
                 debugInfo = ",".join([debugInfo, "VAF"])
                 if not args.debug:
                     continue

From 4f031b725652e2383a6abb59ea812556ecdf602e Mon Sep 17 00:00:00 2001
From: Gutman Tom <tgutman@shdli007.curie.fr>
Date: Thu, 14 May 2020 23:25:23 +0200
Subject: [PATCH 12/15] modif vaf maf + new features + recommended params

---
 CHANGELOG |  7 +++----
 README.md | 43 ++++++++++++++++++++++---------------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 1e6df11..8254306 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,9 @@ CHANGES IN VERSION 1.2.0
 SIGNIFICANT USER CHANGES
 
    o Add parameter `--minAltDepth`
+   o Rework filters to handle multiallelic Variants
+   o Swith filter name from --minVAF & --minMAF to --vaf & --maf
+   o test if the vcf contains only one sample 
 
 *************************************
 CHANGES IN VERSION 1.1.0
@@ -24,7 +27,3 @@ RELEASE VERSION 1.0.0
    o Tested on the DRAGON data
    o Support for Annovar and snpEff databases
    o Support for Varscan2 and Mutect2
-
-
-
-
diff --git a/README.md b/README.md
index 3fb9ba3..4b66426 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 
 This tool was designed to calculate a **Tumor Mutational Burden (TMB)** score from a VCF file.
 
-The TMB is usually defined as the total number of non-synonymous mutations per coding area of a tumor genome. This metric is mainly used as a biomarker in clinical practice to determine whether to use or not immunomodulatory anticancer drugs (immune checkpoint inhibitors such as Nivolumab). Whole Exome Sequencing (WES) allows comprehensive measurement of TMB and is considered the gold standard. In practice, as TMB is mainly used in the routine of the clinic, and due to the high cost of WES, TMB calculation based on gene panels is preferred. 
+The TMB is usually defined as the total number of non-synonymous mutations per coding area of a tumor genome. This metric is mainly used as a biomarker in clinical practice to determine whether to use or not immunomodulatory anticancer drugs (immune checkpoint inhibitors such as Nivolumab). Whole Exome Sequencing (WES) allows comprehensive measurement of TMB and is considered the gold standard. In practice, as TMB is mainly used in the routine of the clinic, and due to the high cost of WES, TMB calculation based on gene panels is preferred.
 
 Currently, the main limitation of TMB calculation is the lack of standard for its calculation. Therefore, we decided to propose a very **versatile** tool allowing the user to define exactly which type of variants to use or filter.
 
@@ -34,9 +34,9 @@ The TMB is defined as the number of variants over the size of the genomic region
 
 ```bash
 python bin/pyTMB.py --help
-usage: pyTMB.py [-h] [-i VCF] [--dbConfig DBCONFIG] [--varConfig VARCONFIG] 
+usage: pyTMB.py [-h] [-i VCF] [--dbConfig DBCONFIG] [--varConfig VARCONFIG]
                 [--effGenomeSize EFFGENOMESIZE] [--bed BED]
-                [--minVAF MINVAF] [--minMAF MINMAF] [--minDepth MINDEPTH] [--minAltDepth MINALTDEPTH]
+                [--vaf MINVAF] [--maf MINMAF] [--minDepth MINDEPTH] [--minAltDepth MINALTDEPTH]
                 [--filterLowQual] [--filterIndels] [--filterCoding]
                 [--filterSplice] [--filterNonCoding] [--filterSyn]
                 [--filterNonSyn] [--filterCancerHotspot] [--filterPolym]
@@ -66,15 +66,15 @@ optional arguments:
   --filterCancerHotspot               Filter variants annotated as cancer hotspots
   --filterPolym                       Filter polymorphism variants in genome databases. See --minMAF
   --filterRecurrence                  Filter on recurrence values
-  
+
   --polymDb POLYMDB                   Databases used for polymorphisms detection (comma separated)
   --cancerDb CANCERDB                 Databases used for cancer hotspot annotation (comma separated)
-  
+
   --verbose
   --debug
   --export
   --version
-		  
+
 ```
 
 ## Configs
@@ -82,7 +82,7 @@ optional arguments:
 Working with vcf files is usually not straighforward, and mainly depends on the variant caller and annotation tools/databases used.
 In order to make this tool as flexible as possible, we decided to set up **two configurations files** to defined with fields as to be checked and in which case.
 
-The `--dbConfig` file described all details about annotation. As an exemple, we provide some configurations for **Annovar** (*conf/annovar.yml*) 
+The `--dbConfig` file described all details about annotation. As an exemple, we provide some configurations for **Annovar** (*conf/annovar.yml*)
 and **snpEff** (*conf/snpeff.yaml*) tool.  
 These files can be customized by the user.
 
@@ -121,17 +121,17 @@ The same is true for the `--cancerDb` parameter.
 
 ### Filters
 
-#### `--minVAF MINVAF`
+#### `--vaf MINVAF`
 Filter variants with Allelic Ratio < minVAF. Note the field used to get the Allelic Ratio field is defined in the *conf/caller.yml* file.
 In this case, the programm will first look for this information in the **FORMAT** field, and then in the **INFO** field.
 
-#### `--minMAF MINMAF`
-Filter variants with MAF < minMAF. Note the databases used to check the Min Allele Frequency are set using the `--polymDb` 
+#### `--maf MINMAF`
+Filter variants with MAF < minMAF. Note the databases used to check the Min Allele Frequency are set using the `--polymDb`
 parameters and the *conf/databases.yml* file.
 
 #### `--minDepth MINDEPTH`
-Filter variants with depth < minDepth. Note the field used to get the depth is defined in the *conf/caller.yml* file. 
-In this case, the programm will first look for this information in the **FORMAT** field, and then in the **INFO** field. 
+Filter variants with depth < minDepth. Note the field used to get the depth is defined in the *conf/caller.yml* file.
+In this case, the programm will first look for this information in the **FORMAT** field, and then in the **INFO** field.
 
 #### `--minAltDepth MINALTDEPTH`
 FIlter alternative allele with depth < minAltDepth. The programm will look in the **FORMAT** field exclusively.
@@ -166,7 +166,7 @@ Filter polymorphism variants from genome databases. The databases to considered
 The fields to scan for each database are defined in the *conf/databases.yml* file and the population frequency is compared with the `--minMAF` field.
 
 #### `--filterRecurrence`
-Filter on recurrence values (for instance, intra-run occurence). In this case, the vcf file must contains the recurrence information 
+Filter on recurrence values (for instance, intra-run occurence). In this case, the vcf file must contains the recurrence information
 which can be defined the *conf/databases.yml* file.
 
 ## Outputs
@@ -188,7 +188,7 @@ Here is a list of recommanded parameters for different user cases.
 
 ### Gene Panel
 
-Let's calculated the TMB on a gene panel vcf file (coding size = 1.59Mb, caller = varscan, annotation = Annovar) with the following criteria: 
+Let's calculated the TMB on a gene panel vcf file (coding size = 1.59Mb, caller = varscan, annotation = Annovar) with the following criteria:
 - minDepth at 100X
 - non-synonymous
 - coding and splice
@@ -212,19 +212,21 @@ python pyTMB.py -i ${VCF} \
 
 ### Exome / Whole Genome Sequencing
 
-For larger panel, we recommand the following parameters ;
+For WES, we recommend filtering low quality, non coding, synonymous, polymorphic variants. Here, indels and splicing variants are kept. For WES, an effective Genome size of 33Mb is used but a tailored size depending on the variants and regions is preferred.
+
+
+In the case of a WES variant calling using Mutect2 as variant caller and Snpeff as annotation tool, a typical usage would be :
 
 ```
-python pyTMB.py -i ${VCF} \
+python pyTMB.py -i ${VCF} --effGenomeSize 33280000 \
 --dbConfig conf/snpeff.yml \
 --varConfig conf/mutect2.yml \
---minVAF 0.05 \
+--vaf 0.05 --maf 0.001 --minDepth 20 --minAltDepth 3\
 --filterLowQual \
---minDepth 100 \
 --filterNonCoding \
---filterIndel \
 --filterSyn \
---filterPolym --minMAF 0.001 --polymDb 1k,gnomad
+--filterPolym \
+--polymDb 1k,gnomad  > TMB_results.log
 ```
 
 ### Credits
@@ -234,4 +236,3 @@ This pipeline has been written by the bioinformatics core facility in close coll
 ### Contacts
 
 For any question, bug or suggestion, please use the issues system or contact the bioinformatics core facility.
-

From f6f186ac20a5528672ed5532d02aa8353e0ffdb5 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Fri, 15 May 2020 09:16:06 +0200
Subject: [PATCH 13/15] [BUMP] version 1.2.0

---
 bin/pyTMB.py          | 225 +++++++++++-------
 bin/pyTMB_test.py     | 531 ------------------------------------------
 test-op/dragon/run.sh |  37 +--
 3 files changed, 160 insertions(+), 633 deletions(-)
 delete mode 100644 bin/pyTMB_test.py

diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index 5c9a36f..11a7ad1 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -23,23 +23,17 @@
 Let's defined a TMB as a score using PASS, non-synonymous, coding, non polymorphism variants ...
 In this case, a typical usage would be :
 
-python pyTMB.py -i ${VCF} --minDepth 100 \
+python pyTMB.py -i ${VCF} --effGenomeSize 33280000 \
+--vaf 0.05 --maf 0.001 --minDepth 20 --minAltDepth 3\
 --filterLowQual \
 --filterNonCoding \
---filterSplice \
 --filterSyn \
---filterPolym  --minMAF 0.001 --polymDb 1k,gnomad \
---effGenomeSize 1590000 > TMB_results.log
+--filterPolym \
+--polymDb 1k,gnomad \
+--dbConfig ${DB_CONFIG} \
+--varConfig ${VAR_CONFIG} > TMB_results.log
 """
 
-
-"""
-Load yaml file
-"""
-
-
-
-
 import cyvcf2
 import argparse
 import sys
@@ -49,7 +43,14 @@
 import numpy as np
 import os.path
 from datetime import date
+
+"""
+Load yaml file
+"""
+
+
 def loadConfig(infile):
+
     with open(infile, 'r') as stream:
         try:
             return(yaml.safe_load(stream))
@@ -57,6 +58,25 @@ def loadConfig(infile):
             raise
 
 
+"""
+Extract VCF column name with multi-Allelic information
+"""
+
+
+def getMultiAlleleHeader(vcf):
+
+    FORMAT = []
+    INFO = []
+    for h in vcf.header_iter():
+        i = h.info(extra=True)
+        if 'Number' in i.keys() and i['Number'] == 'A':
+            if i['HeaderType'] == 'FORMAT':
+                FORMAT.append(i['ID'])
+            elif i['HeaderType'] == 'INFO':
+                INFO.append(i['ID'])
+    return(dict(FORMAT=FORMAT, INFO=INFO))
+
+
 """
 Calculate Effective Genome Size from a BED file
 """
@@ -95,7 +115,6 @@ def isAnnotatedAs(v, infos, flags, sep):
 
     # Subset annotation information as list
     subINFO = subsetINFO(infos, keys=flags.keys())
-
     # Compare list variant information and expected list of tags
     for sub in subINFO:
         # For all keys
@@ -106,9 +125,6 @@ def isAnnotatedAs(v, infos, flags, sep):
                 for subval in sub[k].split(sep):
                     if val == subval:
                         return(True)
-                # else:
-                #    if re.search(val, sub[k]):
-                #        return(True)
     return(False)
 
 
@@ -121,7 +137,12 @@ def isPolym(v, infos, flags, val):
 
     subINFO = subsetINFO(infos, keys=flags)
     for key in subINFO:
-        if subINFO[key] is not None and subINFO[key] != ".":
+        if type(subINFO[key]) is tuple:
+            for i in subINFO[key]:
+                if i is not None and i != ".":
+                    if float(i) >= float(val):
+                        return True
+        elif subINFO[key] is not None and subINFO[key] != ".":
             if float(subINFO[key]) >= float(val):
                 return True
     return(False)
@@ -133,6 +154,7 @@ def isPolym(v, infos, flags, val):
 
 
 def isCancerHotspot(v, infos, flags):
+
     subINFO = subsetINFO(infos, keys=flags)
     for key in subINFO:
         if subINFO[key] is not None and subINFO[key] != ".":
@@ -155,7 +177,6 @@ def subsetINFO(annot, keys):
                 subsetInfo.append(z)
     else:
         subsetInfo = dict((k, annot[k]) for k in keys if k in annot)
-
     return(subsetInfo)
 
 
@@ -191,6 +212,7 @@ def info2dl(INFO):
 
 """
 Get a tag value from either the format field or the info field
+Return a 2D numpy array
 """
 
 
@@ -199,17 +221,20 @@ def getTag(v, tag):
     # First check in FORMAT field
     if tag in variant.FORMAT:
         val = variant.format(tag)
-        if np.shape(val) == (1, 1):
-            if val < 0:
-                val = None
+        # if np.shape(val) == (1, 1) and val < 0:
+        #        val = None
 
     # Otherwise, check in INFO field
     if tag not in variant.FORMAT or val is None:
         val = variant.INFO.get(tag)
 
-    if np.shape(val) == (1, 1):
-        if val is not None:
-            val = float(val)
+    # if np.shape(val) == (1, 1) and val is not None:
+    #    val = float(val)
+
+    if type(val) != np.ndarray:
+        val = np.array([val], float)
+    else:
+        val = val.astype('float')
 
     return(val)
 
@@ -220,58 +245,71 @@ def getTag(v, tag):
 
 
 def argsParse():
+
     parser = argparse.ArgumentParser()
     parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
 
     # Configs
-    parser.add_argument("--dbConfig", help="Databases config file",
-                        default="./config/databases.yml")
-    parser.add_argument("--varConfig", help="Variant calling config file",
-                        default="./config/calling.yml")
+    parser.add_argument("--dbConfig", 
+                        help="Databases config file", type=str, default="./config/databases.yml")
+    parser.add_argument("--varConfig", 
+                        help="Variant calling config file", type=str, default="./config/calling.yml")
+    parser.add_argument("--sample", 
+                        help="Specify the sample ID to focus on", type=str, default=None)
 
     # Efective genome size
-    parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
-    parser.add_argument(
-        "--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
+    parser.add_argument("--effGenomeSize", 
+                        help="Effective genome size", type=int, default=None)
+    parser.add_argument("--bed", 
+                        help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
 
     # Thresholds
-    parser.add_argument(
-        "--minVAF", help="Filter variants with Allelic Ratio < minVAF", type=float, default=0.05)
-    parser.add_argument("--minMAF", help="Filter variants with MAF < minMAF",
-                        type=float, default=0.001)
-    parser.add_argument(
-        "--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
-    parser.add_argument(
-        "--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
+    parser.add_argument("--vaf", 
+                        help="Filter variants with Allelic Ratio < vaf", type=float, default=0.05)
+    parser.add_argument("--maf", 
+                        help="Filter variants with MAF > maf", type=float, default=0.001)
+    parser.add_argument("--minDepth", 
+                        help="Filter variants with depth < minDepth", type=int, default=5)
+    parser.add_argument("--minAltDepth", 
+                        help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
 
     # Which variants to use
     parser.add_argument("--filterLowQual",
                         help="Filter low quality (i.e not PASS) variant", action="store_true")
-    parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
-    parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
-    parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
-    parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
-    parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
-    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",
-                        action="store_true")
+    parser.add_argument("--filterIndels", 
+                        help="Filter insertions/deletions", action="store_true")
+    parser.add_argument("--filterCoding", 
+                        help="Filter Coding variants", action="store_true")
+    parser.add_argument("--filterSplice", 
+                        help="Filter Splice variants", action="store_true")
+    parser.add_argument("--filterNonCoding", 
+                        help="Filter Non-coding variants", action="store_true")
+    parser.add_argument("--filterSyn", 
+                        help="Filter Synonymous variants", action="store_true")
+    parser.add_argument("--filterNonSyn", 
+                        help="Filter Non-Synonymous variants", action="store_true")
     parser.add_argument("--filterCancerHotspot",
                         help="Filter variants annotated as cancer hotspots", action="store_true")
-    parser.add_argument(
-        "--filterPolym", help="Filter polymorphism variants in genome databases. See --minMAF", action="store_true")
+    parser.add_argument("--filterPolym", 
+                        help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
     parser.add_argument("--filterRecurrence",
                         help="Filter on recurrence values", action="store_true")
 
     # Databases
-    parser.add_argument(
-        "--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
-    parser.add_argument(
-        "--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
+    parser.add_argument("--polymDb", 
+                        help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
+    parser.add_argument("--cancerDb", 
+                        help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
 
     # Others
-    parser.add_argument("--verbose", action="store_true")
-    parser.add_argument("--debug", action="store_true")
-    parser.add_argument("--export", help="", action="store_true")
-    parser.add_argument("--version", action='version', version="%(prog)s ("+__version__+")")
+    parser.add_argument("--verbose", 
+                        help="Active verbose mode", action="store_true")
+    parser.add_argument("--debug", 
+                        help="Export original VCF with TMB_FILTER tag", action="store_true")
+    parser.add_argument("--export", 
+                        help="Export a VCF with the considered variants", action="store_true")
+    parser.add_argument("--version", 
+                        help="Version number", action='version', version="%(prog)s ("+__version__+")")
 
     args = parser.parse_args()
     return (args)
@@ -281,25 +319,33 @@ def argsParse():
 
     args = argsParse()
 
-    # Loading Data
-    vcf = cyvcf2.VCF(args.vcf)
+    # Load Data
+    if args.sample is not None:
+        vcf = cyvcf2.VCF(args.vcf, samples=args.sample)
+    else:
+        vcf = cyvcf2.VCF(args.vcf)
+
+    # Sample name
+    if len(vcf.samples) > 1:
+        sys.stderr.write("Error: " + len(vcf.samples) +
+                         " sample detected. This version is designed for a single sample !")
+        sys.exit(-1)
 
+    # Ouptuts
     if args.export:
         wx = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
                                   '_export.vcf', os.path.basename(args.vcf)), vcf)
-
     if args.debug:
         vcf.add_info_to_header({'ID': 'TMB_FILTERS', 'Description': 'Detected filters for TMB calculation',
                                 'Type': 'Character', 'Number': '1'})
         wd = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
                                   '_debug.vcf', os.path.basename(args.vcf)), vcf)
 
+    # Load config
     dbFlags = loadConfig(args.dbConfig)
     callerFlags = loadConfig(args.varConfig)
 
-    varCounter = 0
-    varTMB = 0
-
+    # Genome size
     if args.effGenomeSize is None:
         if args.bed is not None:
             effGS = getEffGenomeSizeFromBed(args.bed)
@@ -310,12 +356,16 @@ def argsParse():
     else:
         effGS = args.effGenomeSize
 
+
+    varCounter = 0
+    varNI = 0
+    varTMB = 0   
     for variant in vcf:
         varCounter += 1
         if (varCounter % 1000 == 0 and args.verbose):
             print ("## ", varCounter)
-            # if args.debug and varCounter == 100000:
-            #    sys.exit()
+            if args.debug and varCounter == 1000:
+                sys.exit()
 
         try:
             # All vcf INFO
@@ -328,11 +378,9 @@ def argsParse():
             else:
                 annotInfo = info2dl(variant.INFO)
 
-            # Field separator
-            sep = dbFlags['sep']
-
             # No INFO field
             if dbInfo is None or annotInfo is None:
+                varNI += 1
                 continue
 
             # Indels
@@ -341,64 +389,72 @@ def argsParse():
                 if not args.debug:
                     continue
 
+            # Variant has a QUAL value or not PASS in the FILTER column
+            if args.filterLowQual and (variant.QUAL is not None or variant.FILTER is not None):
+                debugInfo = ",".join([debugInfo, "QUAL"])
+                if not args.debug:
+                    continue
+
+            #######################
+            # FORMAT
+            #######################
+
             # Variant Allele Frequency
             fval = getTag(variant, callerFlags['freq'])
-            if type(fval) == float and fval is not None and fval < args.minVAF:
+            if fval is not None and len(fval[fval < args.vaf]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "VAF"])
                 if not args.debug:
                     continue
 
             # Sequencing Depth
             dval = getTag(variant, callerFlags['depth'])
-            if dval is not None and dval < args.minDepth:
+            if dval is not None and len(dval[dval < args.minDepth]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "DEPTH"])
                 if not args.debug:
                     continue
 
             # Alternative allele Depth
             ad = getTag(variant, callerFlags['altDepth'])
-            if ad is not None and len(ad[0]) == 2 and ad[0][1] < args.minAltDepth:
+            if ad is not None and len(ad[ad < args.minAltDepth]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "ALTDEPTH"])
                 if not args.debug:
                     continue
 
-            # Variant has a QUAL value or not PASS in the FILTER column
-            if args.filterLowQual and (variant.QUAL is not None or variant.FILTER is not None):
-                debugInfo = ",".join([debugInfo, "QUAL"])
-                if not args.debug:
-                    continue
+            ######################
+            # Annotation INFO
+            ######################
 
             # Coding variants
-            if args.filterCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isCoding'], sep=sep):
+            if args.filterCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isCoding'], sep=dbFlags['sep']):
                 debugInfo = ",".join([debugInfo, "CODING"])
                 if not args.debug:
                     continue
 
             # Splice variants
-            if args.filterSplice and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSplicing'], sep=sep):
+            if args.filterSplice and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSplicing'], sep=dbFlags['sep']):
                 debugInfo = ",".join([debugInfo, "SPLICING"])
                 if not args.debug:
                     continue
 
             # Non-coding variants
-            if args.filterNonCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonCoding'], sep=sep):
+            if args.filterNonCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonCoding'], sep=dbFlags['sep']):
                 debugInfo = ",".join([debugInfo, "NONCODING"])
                 if not args.debug:
                     continue
 
             # Synonymous
-            if args.filterSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSynonymous'], sep=sep):
+            if args.filterSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSynonymous'], sep=dbFlags['sep']):
                 debugInfo = ",".join([debugInfo, "SYN"])
                 if not args.debug:
                     continue
 
             # Non synonymous
-            if args.filterNonSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonSynonymous'], sep=sep):
+            if args.filterNonSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonSynonymous'], sep=dbFlags['sep']):
                 debugInfo = ",".join([debugInfo, "NON_SYN"])
                 if not args.debug:
                     continue
 
-            # Hotpost
+            # Hotspot
             if args.filterCancerHotspot:
                 # Flatten list of fields
                 fdb = []
@@ -419,7 +475,7 @@ def argsParse():
                     for x in dbFlags['polymDb'][db]:
                         fdb.append(x)
 
-                if isPolym(variant, infos=dbInfo, flags=fdb, val=args.minMAF):
+                if isPolym(variant, infos=dbInfo, flags=fdb, val=args.maf):
                     debugInfo = ",".join([debugInfo, "POLYM"])
                     if not args.debug:
                         continue
@@ -464,8 +520,8 @@ def argsParse():
     print("")
     print("Filters:")
     print("-------")
-    print("minVAF=", args.minVAF)
-    print("minMAF=", args.minMAF)
+    print("VAF=", args.vaf)
+    print("MAF=", args.maf)
     print("minDepth=", args.minDepth)
     print("minAltDepth=", args.minAltDepth)
     print("filterLowQual=", args.filterLowQual)
@@ -479,6 +535,7 @@ def argsParse():
     print("filterPolym=", args.filterPolym)
     print("")
     print("Total number of variants=", varCounter)
+    print("Non-informative variants=", varNI)
     print("Variants after filters=", varTMB)
     print("Effective Genome Size=", effGS)
     print("")
diff --git a/bin/pyTMB_test.py b/bin/pyTMB_test.py
deleted file mode 100644
index 6c0f6b3..0000000
--- a/bin/pyTMB_test.py
+++ /dev/null
@@ -1,531 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-#
-#  This file is part of pyTMB software.
-#
-#  Copyright (c) 2020 - Institut Curie
-#
-#  File author(s):
-#      Tom Gutman <tom.gutman@curie.fr>,
-#      Nicolas Servant <nicolas.servant@curie.fr>
-#
-#  Distributed under the terms of the CeCILL-B license.
-#  The full license is in the LICENSE file, distributed with this software.
-#
-##############################################################################
-
-__version__ = '1.2.1'
-
-"""
-This script is designed to calculate a TMB score from a VCF file.
-If no filters are specified, all variants will be used.
-
-Let's defined a TMB as a score using PASS, non-synonymous, coding, non polymorphism variants ...
-In this case, a typical usage would be :
-
-python pyTMB.py -i ${VCF} --effGenomeSize 33280000 \
---vaf 0.05 --maf 0.001 --minDepth 20 --minAltDepth 3\
---filterLowQual \
---filterNonCoding \
---filterSyn \
---filterPolym \
---polymDb 1k,gnomad \
---dbConfig ${DB_CONFIG} \
---varConfig ${VAR_CONFIG} > TMB_results.log
-"""
-
-import cyvcf2
-import argparse
-import sys
-import warnings
-import re
-import yaml
-import numpy as np
-import os.path
-from datetime import date
-
-"""
-Load yaml file
-"""
-
-
-def loadConfig(infile):
-
-    with open(infile, 'r') as stream:
-        try:
-            return(yaml.safe_load(stream))
-        except:
-            raise
-
-
-def getMultiAlleleHeader(vcf):
-
-    FORMAT = []
-    INFO = []
-    for h in vcf.header_iter():
-        i = h.info(extra=True)
-        if 'Number' in i.keys() and i['Number'] == 'A':
-            if i['HeaderType'] == 'FORMAT':
-                FORMAT.append(i['ID'])
-            elif i['HeaderType'] == 'INFO':
-                INFO.append(i['ID'])
-    return(dict(FORMAT=FORMAT, INFO=INFO))
-
-
-"""
-Calculate Effective Genome Size from a BED file
-"""
-
-
-def getEffGenomeSizeFromBed(infile, verbose=False):
-
-    if verbose:
-        print("## Loading BED file '", infile, "'...")
-
-    bedhandle = open(infile)
-    effgs = 0
-    nline = 0
-    for line in bedhandle:
-        bedtab = line.strip().split("\t")
-        nline += 1
-        try:
-            chromosome, start, end = bedtab[:3]
-        except ValueError:
-            sys.stderr.write("Error : wrong input format in line", nline, ". Not a BED file !?")
-            sys.exit(-1)
-
-        intl = abs(int(end) - int(start))
-        effgs += intl
-
-    bedhandle.close()
-    return effgs
-
-
-"""
-Check if a variant has the provided annotation flags
-"""
-
-
-def isAnnotatedAs(v, infos, flags, sep):
-
-    # Subset annotation information as list
-    subINFO = subsetINFO(infos, keys=flags.keys())
-    # Compare list variant information and expected list of tags
-    for sub in subINFO:
-        # For all keys
-        for k in flags.keys():
-            # For each value in keys
-            for val in flags[k]:
-                # Use search to deal with multiple annotations
-                for subval in sub[k].split(sep):
-                    if val == subval:
-                        return(True)
-    return(False)
-
-
-"""
-Check if a variant is in a genomeDb with a MAF > val
-"""
-
-
-def isPolym(v, infos, flags, val):
-
-    subINFO = subsetINFO(infos, keys=flags)
-    for key in subINFO:
-        if type(subINFO[key]) is tuple:
-            for i in subINFO[key]:
-                if i is not None and i != ".":
-                    if float(i) >= float(val):
-                        return True
-        elif subINFO[key] is not None and subINFO[key] != ".":
-            if float(subINFO[key]) >= float(val):
-                return True
-    return(False)
-
-
-"""
-Check if a variant is annotated as a cancer hotspot
-"""
-
-
-def isCancerHotspot(v, infos, flags):
-
-    subINFO = subsetINFO(infos, keys=flags)
-    for key in subINFO:
-        if subINFO[key] is not None and subINFO[key] != ".":
-            return True
-    return(False)
-
-
-"""
-Subset the annotation information to a few key values
-"""
-
-
-def subsetINFO(annot, keys):
-
-    if isinstance(annot, list):
-        subsetInfo = []
-        for i in range(0, len(annot)):
-            z = dict((k, annot[i][k]) for k in keys if k in annot[i])
-            if len(z) > 0:
-                subsetInfo.append(z)
-    else:
-        subsetInfo = dict((k, annot[k]) for k in keys if k in annot)
-    return(subsetInfo)
-
-
-"""
-Format the INFO field from snpEff and return a list of dict
-ie. snpEff
-"""
-
-
-def infoTag2dl(INFO):
-
-    if INFO is not None:
-        annotTag = INFO.split(',')
-        annotInfo = []
-        for i in range(0, len(annotTag)):
-            annot = annotTag[i].split('|')
-            dictannot = {i: annot[i] for i in range(0, len(annot))}
-            annotInfo.append(dictannot)
-        return(annotInfo)
-
-
-"""
-Format the INFO field from ANNOVAR and return a list of dict
-ie. annovar
-"""
-
-
-def info2dl(INFO):
-
-    if INFO is not None:
-        return [dict(INFO)]
-
-
-"""
-Get a tag value from either the format field or the info field
-Return a 2D numpy array
-"""
-
-
-def getTag(v, tag):
-
-    # First check in FORMAT field
-    if tag in variant.FORMAT:
-        val = variant.format(tag)
-        # if np.shape(val) == (1, 1) and val < 0:
-        #        val = None
-
-    # Otherwise, check in INFO field
-    if tag not in variant.FORMAT or val is None:
-        val = variant.INFO.get(tag)
-
-    # if np.shape(val) == (1, 1) and val is not None:
-    #    val = float(val)
-
-    if type(val) != np.ndarray:
-        val = np.array([val], float)
-    else:
-        val = val.astype('float')
-
-    return(val)
-
-
-"""
-Parse inputs
-"""
-
-
-def argsParse():
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
-
-    # Configs
-    parser.add_argument("--dbConfig", help="Databases config file",
-                        type=str, default="./config/databases.yml")
-    parser.add_argument("--varConfig", help="Variant calling config file",
-                        type=str, default="./config/calling.yml")
-    parser.add_argument("--sample", help="Specify the sample ID to focus on",
-                        type=str, default=None)
-
-    # Efective genome size
-    parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
-    parser.add_argument(
-        "--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
-
-    # Thresholds
-    parser.add_argument("--vaf", help="Filter variants with Allelic Ratio < vaf",
-                        type=float, default=0.05)
-    parser.add_argument("--maf", help="Filter variants with MAF > maf", type=float, default=0.001)
-    parser.add_argument(
-        "--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
-    parser.add_argument(
-        "--minAltDepth", help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
-
-    # Which variants to use
-    parser.add_argument("--filterLowQual",
-                        help="Filter low quality (i.e not PASS) variant", action="store_true")
-    parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
-    parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
-    parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
-    parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
-    parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
-    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants",
-                        action="store_true")
-    parser.add_argument("--filterCancerHotspot",
-                        help="Filter variants annotated as cancer hotspots", action="store_true")
-    parser.add_argument(
-        "--filterPolym", help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
-    parser.add_argument("--filterRecurrence",
-                        help="Filter on recurrence values", action="store_true")
-
-    # Databases
-    parser.add_argument(
-        "--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
-    parser.add_argument(
-        "--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
-
-    # Others
-    parser.add_argument("--verbose", help="Active verbose mode", action="store_true")
-    parser.add_argument(
-        "--debug", help="Export original VCF with TMB_FILTER tag", action="store_true")
-    parser.add_argument(
-        "--export", help="Export a VCF with the considered variants", action="store_true")
-    parser.add_argument("--version", help="Version number", action='version',
-                        version="%(prog)s ("+__version__+")")
-
-    args = parser.parse_args()
-    return (args)
-
-
-if __name__ == "__main__":
-
-    args = argsParse()
-
-    # Loading Data
-    if args.sample is not None:
-        vcf = cyvcf2.VCF(args.vcf, samples=args.sample)
-    else:
-        vcf = cyvcf2.VCF(args.vcf)
-
-    # maHeader = getMultiAlleleHeader(vcf)
-
-    if len(vcf.samples) > 1:
-        sys.stderr.write("Error: " + len(vcf.samples) +
-                         " sample detected. This version is designed for a single sample !")
-        sys.exit(-1)
-
-    if args.export:
-        wx = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
-                                  '_export.vcf', os.path.basename(args.vcf)), vcf)
-
-    if args.debug:
-        vcf.add_info_to_header({'ID': 'TMB_FILTERS', 'Description': 'Detected filters for TMB calculation',
-                                'Type': 'Character', 'Number': '1'})
-        wd = cyvcf2.Writer(re.sub(r'\.vcf$|\.vcf.gz$|\.bcf',
-                                  '_debug.vcf', os.path.basename(args.vcf)), vcf)
-
-    dbFlags = loadConfig(args.dbConfig)
-    callerFlags = loadConfig(args.varConfig)
-
-    varCounter = 0
-    varNI = 0
-    varTMB = 0
-
-    if args.effGenomeSize is None:
-        if args.bed is not None:
-            effGS = getEffGenomeSizeFromBed(args.bed)
-        else:
-            sys.stderr.write(
-                "Error: Effective Genome Size not specified. See --effGenomeSize or --bed")
-            sys.exit(-1)
-    else:
-        effGS = args.effGenomeSize
-
-    for variant in vcf:
-        varCounter += 1
-        if (varCounter % 1000 == 0 and args.verbose):
-            print ("## ", varCounter)
-            if args.debug and varCounter == 1000:
-                sys.exit()
-
-        try:
-            # if len(variant.ALT) == 1:
-            #    continue
-
-            # All vcf INFO
-            dbInfo = dict(variant.INFO)
-            debugInfo = ""
-
-            # Get annotation INFO as a list of dict
-            if dbFlags['tag'] != '':
-                annotInfo = infoTag2dl(variant.INFO.get(dbFlags['tag']))
-            else:
-                annotInfo = info2dl(variant.INFO)
-
-            # No INFO field
-            if dbInfo is None or annotInfo is None:
-                varNI += 1
-                continue
-
-            # Indels
-            if args.filterIndels and variant.is_indel:
-                debugInfo = ",".join([debugInfo, "INDEL"])
-                if not args.debug:
-                    continue
-
-            # Variant has a QUAL value or not PASS in the FILTER column
-            if args.filterLowQual and (variant.QUAL is not None or variant.FILTER is not None):
-                debugInfo = ",".join([debugInfo, "QUAL"])
-                if not args.debug:
-                    continue
-
-            #######################
-            # FORMAT
-            #######################
-
-            # Variant Allele Frequency
-            fval = getTag(variant, callerFlags['freq'])
-            if fval is not None and len(fval[fval < args.vaf]) == len(variant.ALT):
-                debugInfo = ",".join([debugInfo, "VAF"])
-                if not args.debug:
-                    continue
-
-            # Sequencing Depth
-            dval = getTag(variant, callerFlags['depth'])
-            if dval is not None and len(dval[dval < args.minDepth]) == len(variant.ALT):
-                debugInfo = ",".join([debugInfo, "DEPTH"])
-                if not args.debug:
-                    continue
-
-            # Alternative allele Depth
-            ad = getTag(variant, callerFlags['altDepth'])
-            if ad is not None and len(ad[ad < args.minAltDepth]) == len(variant.ALT):
-                debugInfo = ",".join([debugInfo, "ALTDEPTH"])
-                if not args.debug:
-                    continue
-
-            ######################
-            # Annotation INFO
-            ######################
-
-            # Coding variants
-            if args.filterCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isCoding'], sep=dbFlags['sep']):
-                debugInfo = ",".join([debugInfo, "CODING"])
-                if not args.debug:
-                    continue
-
-            # Splice variants
-            if args.filterSplice and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSplicing'], sep=dbFlags['sep']):
-                debugInfo = ",".join([debugInfo, "SPLICING"])
-                if not args.debug:
-                    continue
-
-            # Non-coding variants
-            if args.filterNonCoding and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonCoding'], sep=dbFlags['sep']):
-                debugInfo = ",".join([debugInfo, "NONCODING"])
-                if not args.debug:
-                    continue
-
-            # Synonymous
-            if args.filterSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isSynonymous'], sep=dbFlags['sep']):
-                debugInfo = ",".join([debugInfo, "SYN"])
-                if not args.debug:
-                    continue
-
-            # Non synonymous
-            if args.filterNonSyn and isAnnotatedAs(variant, infos=annotInfo, flags=dbFlags['isNonSynonymous'], sep=dbFlags['sep']):
-                debugInfo = ",".join([debugInfo, "NON_SYN"])
-                if not args.debug:
-                    continue
-
-            # Hotspot
-            if args.filterCancerHotspot:
-                # Flatten list of fields
-                fdb = []
-                for db in args.cancerDb.split(','):
-                    for x in dbFlags['cancerDb'][db]:
-                        fdb.append(x)
-
-                if isCancerHotspot(variant, infos=dbInfo, flags=fdb):
-                    debugInfo = ",".join([debugInfo, "HOTSPOT"])
-                    if not args.debug:
-                        continue
-
-            # Polymorphisms
-            if args.filterPolym:
-                # Flatten list of fields
-                fdb = []
-                for db in args.polymDb.split(','):
-                    for x in dbFlags['polymDb'][db]:
-                        fdb.append(x)
-
-                if isPolym(variant, infos=dbInfo, flags=fdb, val=args.maf):
-                    debugInfo = ",".join([debugInfo, "POLYM"])
-                    if not args.debug:
-                        continue
-
-            # Recurrence
-            if args.filterRecurrence:
-                if isPolym(variant, infos=dbInfo, flags=dbFlags['recurrence']['run'], val=0):
-                    debugInfo = ",".join([debugInfo, "RUNREC"])
-                    if not args.debug:
-                        continue
-
-        except:
-            warnflag = str(variant.CHROM) + ":" + str(variant.start) + "-" + str(variant.end)
-            warnings.warn("Warning : variant ", warnflag, " raises an error. Skipped so far ...")
-            raise
-
-        # Still alive
-        if debugInfo == "":
-            varTMB += 1
-            if args.export:
-                wx.write_record(variant)
-
-        if args.debug:
-            variant.INFO["TMB_FILTERS"] = re.sub(r'^,', '', debugInfo)
-            wd.write_record(variant)
-
-    if args.export:
-        wx.close()
-    if args.debug:
-        wd.close()
-    vcf.close()
-
-    # Calculate TMB
-    TMB = round(float(varTMB)/(float(effGS)/1e6), 2)
-
-    # Output
-    print("pyTMB version=", __version__)
-    print("When=", date.today())
-    print("")
-    print("Config caller=", args.varConfig)
-    print("Config databases=", args.dbConfig)
-    print("")
-    print("Filters:")
-    print("-------")
-    print("VAF=", args.vaf)
-    print("MAF=", args.maf)
-    print("minDepth=", args.minDepth)
-    print("minAltDepth=", args.minAltDepth)
-    print("filterLowQual=", args.filterLowQual)
-    print("filterIndels=", args.filterIndels)
-    print("filterCoding=", args.filterCoding)
-    print("filterNonCoding=", args.filterNonCoding)
-    print("filterSplice=", args.filterSplice)
-    print("filterSyn=", args.filterSyn)
-    print("filterNonSyn=", args.filterNonSyn)
-    print("filterConcerHostpot=", args.filterCancerHotspot)
-    print("filterPolym=", args.filterPolym)
-    print("")
-    print("Total number of variants=", varCounter)
-    print("Non-informative variants=", varNI)
-    print("Variants after filters=", varTMB)
-    print("Effective Genome Size=", effGS)
-    print("")
-    print("TMB=", TMB)
diff --git a/test-op/dragon/run.sh b/test-op/dragon/run.sh
index 8df3f37..90cc0a8 100644
--- a/test-op/dragon/run.sh
+++ b/test-op/dragon/run.sh
@@ -1,60 +1,61 @@
-# TMB1 : Basic SNVs only 
+# TMB1 : Basic SNVs splicing
 # TMB2 : SNVs+Indels PASS - splicing - Saved = Basic
 # TMB3 : SNVs PASS - splicing - Saved + 10% allelic ratio
 # TMB4 : SNVs+Indels PASS - splicing - Saved + 10% allelic ratio
 
 ## Filters: nonCoding, splicing, syn, polym, recurrent, low_depth
-configs="--dbConfig /data/users/nservant/GitLab/tmb/config/annovar.yml --varConfig /data/users/nservant/GitLab/tmb/config/varscan2.yml"
-filters="--minDepth 100 --filterNonCoding --filterSplice --filterSyn --filterPolym --filterLowQual --filterRecurrence --minMAF 0.001 --polymDb 1k,gnomad,esp,exac --effGenomeSize 1590000"
+configs="--dbConfig /data/users/nservant/GitLab/tmb/config/annovar.yml --varConfig /data/users/nservant/GitLab/tmb/config/varscan2.yml --export --debug"
+filters="--minDepth 100 --filterNonCoding --filterSplice --filterSyn --filterPolym --filterLowQual --filterRecurrence --maf 0.001 --polymDb 1k,gnomad,esp,exac --effGenomeSize 1590000"
 
-RUN=D320
+RUN=D326
+IDIR=/data/kdi_prod/dataset_all/2011015/backup/
 echo -e "Barcode\tNb_Mut1\tTMB1\tNb_Mut2\tTMB2\tNb_Mut3\tTMB3\tNb_Mut4\tTMB4" > TMB_${RUN}_results.tsv
-for i in $(find /data/tmp/egirard/dragon_tests/D320/ANALYSIS/ -maxdepth 1 -type d -name "${RUN}*")
+for i in $(find ${IDIR} -maxdepth 1 -type d -name "${RUN}*")
 do
     SAMPLE=$(basename $i)
-    VCF=/data/tmp/egirard/dragon_tests/${RUN}/ANALYSIS/${SAMPLE}/CALLING/${SAMPLE}.hg19_multianno.vcf
-    REC=/data/tmp/egirard/dragon_tests/${RUN}/ANALYSIS/RESULTS/VARIANTS/${RUN}_table_report_tagged_tmb_final.tsv
+    VCF=${IDIR}/${SAMPLE}/CALLING/${SAMPLE}.hg19_multianno.vcf
+    REC=${IDIR}/RESULTS/VARIANTS/${RUN}_table_report_tagged_tmb_final.tsv
 
     echo ${SAMPLE}
 
-    awk -v sample=${SAMPLE} '$1==sample{print}' ${REC} > ${SAMPLE}_table_report_rec.tsv
+    #awk -v sample=${SAMPLE} '$1==sample{print}' ${REC} > ${SAMPLE}_table_report_rec.tsv
     cmd="time python addRec.py -i ${VCF} -r ${SAMPLE}_table_report_rec.tsv -o ${SAMPLE}_rec.vcf"
     echo $cmd
-    eval $cmd
+    #eval $cmd
 
     IVCF="${SAMPLE}_rec.vcf"
     OFILE=$(basename $VCF | sed -e 's/.hg19_multianno.vcf//')
 
     # TMB1: 
-    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --filterIndels --minVAF 5 ${filters} ${configs}"
-    echo $cmd > ${OFILE}_TMB1.txt
+    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --sample ${SAMPLE} --filterIndels --vaf 5 ${filters} ${configs}"
+    echo $cmd
     eval $cmd >> ${OFILE}_TMB1.txt
     nbvar1=$(grep "Variants after filters=" ${OFILE}_TMB1.txt | awk -F"=" '{print $2}')
     tmb1=$(grep "TMB=" ${OFILE}_TMB1.txt | awk -F"=" '{print $2}')
     
     # TMB2:
-    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --minVAF 5 ${filters} ${configs}"
-    echo $cmd > ${OFILE}_TMB2.txt
+    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --sample ${SAMPLE} --vaf 5 ${filters} ${configs}"
+    echo $cmd
     eval $cmd >> ${OFILE}_TMB2.txt
     nbvar2=$(grep "Variants after filters=" ${OFILE}_TMB2.txt | awk -F"=" '{print $2}')
     tmb2=$(grep "TMB=" ${OFILE}_TMB2.txt | awk -F"=" '{print $2}')
     
     # TMB3
-    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --filterIndels --minVAF 10 ${filters} ${configs}"
-    echo $cmd > ${OFILE}_TMB3.txt
+    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --sample ${SAMPLE} --filterIndels --vaf 10 ${filters} ${configs}"
+    echo $cmd
     eval $cmd >> ${OFILE}_TMB3.txt
     nbvar3=$(grep "Variants after filters=" ${OFILE}_TMB3.txt | awk -F"=" '{print $2}')
     tmb3=$(grep "TMB=" ${OFILE}_TMB3.txt | awk -F"=" '{print $2}')
     
     # TMB4
-    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --minVAF 10 ${filters} ${configs}"
-    echo $cmd > ${OFILE}_TMB4.txt
+    cmd="time python ../../bin/pyTMB.py -i ${IVCF} --sample ${SAMPLE} --vaf 10 ${filters} ${configs}"
+    echo $cmd
     eval $cmd >> ${OFILE}_TMB4.txt
     nbvar4=$(grep "Variants after filters=" ${OFILE}_TMB4.txt | awk -F"=" '{print $2}')
     tmb4=$(grep "TMB=" ${OFILE}_TMB4.txt | awk -F"=" '{print $2}')
     
     cmd="rm ${SAMPLE}_table_report_rec.tsv ${SAMPLE}_rec.vcf *.txt"
-    eval $cmd
+    #eval $cmd
     
     echo -e "${SAMPLE}\t${nbvar1}\t${tmb1}\t${nbvar2}\t${tmb2}\t${nbvar3}\t${tmb3}\t${nbvar3}\t${tmb3}" >> TMB_${RUN}_results.tsv
 done

From 48aad6698e3086c8ca322cdaa479ccc47a033db1 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Fri, 15 May 2020 09:18:35 +0200
Subject: [PATCH 14/15] [BUMP] version 1.2.0

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 4b66426..16cd483 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ The TMB is defined as the number of variants over the size of the genomic region
 python bin/pyTMB.py --help
 usage: pyTMB.py [-h] [-i VCF] [--dbConfig DBCONFIG] [--varConfig VARCONFIG]
                 [--effGenomeSize EFFGENOMESIZE] [--bed BED]
-                [--vaf MINVAF] [--maf MINMAF] [--minDepth MINDEPTH] [--minAltDepth MINALTDEPTH]
+                [--vaf MINVAF] [--maf MAXMAF] [--minDepth MINDEPTH] [--minAltDepth MINALTDEPTH]
                 [--filterLowQual] [--filterIndels] [--filterCoding]
                 [--filterSplice] [--filterNonCoding] [--filterSyn]
                 [--filterNonSyn] [--filterCancerHotspot] [--filterPolym]
@@ -52,8 +52,8 @@ optional arguments:
   --effGenomeSize EFFGENOMESIZE       Effective genome size
   --bed BED                           Capture design to use if effGenomeSize is not defined (BED file)
 
-  --minVAF MINVAF                     Filter variants with Allelic Ratio < minVAF
-  --minMAF MINMAF                     Filter variants with MAF < minMAF
+  --vaf MINVAF                        Filter variants with Allelic Ratio < minVAF
+  --maf MINMAF                        Filter variants with MAF < maf
   --minDepth MINDEPTH                 Filter variants with depth < minDepth
   --minAltDepth MINALTDEPTH           FIlter alternative allele with depth < minAltDepth
   --filterLowQual                     Filter low quality (i.e not PASS) variant
@@ -125,8 +125,8 @@ The same is true for the `--cancerDb` parameter.
 Filter variants with Allelic Ratio < minVAF. Note the field used to get the Allelic Ratio field is defined in the *conf/caller.yml* file.
 In this case, the programm will first look for this information in the **FORMAT** field, and then in the **INFO** field.
 
-#### `--maf MINMAF`
-Filter variants with MAF < minMAF. Note the databases used to check the Min Allele Frequency are set using the `--polymDb`
+#### `--maf MAXMAF`
+Filter variants with MAF < maf. Note the databases used to check the Min Allele Frequency are set using the `--polymDb`
 parameters and the *conf/databases.yml* file.
 
 #### `--minDepth MINDEPTH`

From fb8cce1a26c9aa8741d074e7de26d5a428d62b22 Mon Sep 17 00:00:00 2001
From: nservant <nicolas.servant@curie.fr>
Date: Fri, 15 May 2020 17:36:26 +0200
Subject: [PATCH 15/15] [BUMP] v1.2.0

---
 bin/pyTMB.py                                  | 85 +++++++------------
 test-op/dragon/D320_tmb_results_DRAGON.tsv    | 33 -------
 test-op/dragon/TMB_D320_results.tsv           | 33 -------
 .../results/D326_tmb_results_DRAGON.tsv       | 27 ++++++
 .../results/TMB_D326_results_v1.2.0.tsv       |  1 +
 test-op/dragon/run.sh                         |  6 +-
 6 files changed, 63 insertions(+), 122 deletions(-)
 delete mode 100644 test-op/dragon/D320_tmb_results_DRAGON.tsv
 delete mode 100644 test-op/dragon/TMB_D320_results.tsv
 create mode 100644 test-op/dragon/results/D326_tmb_results_DRAGON.tsv
 create mode 100644 test-op/dragon/results/TMB_D326_results_v1.2.0.tsv

diff --git a/bin/pyTMB.py b/bin/pyTMB.py
index 11a7ad1..a9804d8 100644
--- a/bin/pyTMB.py
+++ b/bin/pyTMB.py
@@ -250,66 +250,41 @@ def argsParse():
     parser.add_argument("-i", "--vcf", help="Input file (.vcf, .vcf.gz, .bcf)")
 
     # Configs
-    parser.add_argument("--dbConfig", 
-                        help="Databases config file", type=str, default="./config/databases.yml")
-    parser.add_argument("--varConfig", 
-                        help="Variant calling config file", type=str, default="./config/calling.yml")
-    parser.add_argument("--sample", 
-                        help="Specify the sample ID to focus on", type=str, default=None)
+    parser.add_argument("--dbConfig", help="Databases config file", type=str, default="./config/databases.yml")
+    parser.add_argument("--varConfig", help="Variant calling config file", type=str, default="./config/calling.yml")
+    parser.add_argument("--sample", help="Specify the sample ID to focus on", type=str, default=None)
 
     # Efective genome size
-    parser.add_argument("--effGenomeSize", 
-                        help="Effective genome size", type=int, default=None)
-    parser.add_argument("--bed", 
-                        help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
+    parser.add_argument("--effGenomeSize", help="Effective genome size", type=int, default=None)
+    parser.add_argument("--bed", help="Capture design to use if effGenomeSize is not defined (BED file)", default=None)
 
     # Thresholds
-    parser.add_argument("--vaf", 
-                        help="Filter variants with Allelic Ratio < vaf", type=float, default=0.05)
-    parser.add_argument("--maf", 
-                        help="Filter variants with MAF > maf", type=float, default=0.001)
-    parser.add_argument("--minDepth", 
-                        help="Filter variants with depth < minDepth", type=int, default=5)
-    parser.add_argument("--minAltDepth", 
-                        help="Filter variants with alternative allele depth < minAltDepth", type=int, default=3)
+    parser.add_argument("--vaf", help="Filter variants with Allelic Ratio <= vaf", type=float, default=0.05)
+    parser.add_argument("--maf", help="Filter variants with MAF > maf", type=float, default=0.001)
+    parser.add_argument("--minDepth", help="Filter variants with depth < minDepth", type=int, default=5)
+    parser.add_argument("--minAltDepth", help="Filter variants with alternative allele depth <= minAltDepth", type=int, default=2)
 
     # Which variants to use
-    parser.add_argument("--filterLowQual",
-                        help="Filter low quality (i.e not PASS) variant", action="store_true")
-    parser.add_argument("--filterIndels", 
-                        help="Filter insertions/deletions", action="store_true")
-    parser.add_argument("--filterCoding", 
-                        help="Filter Coding variants", action="store_true")
-    parser.add_argument("--filterSplice", 
-                        help="Filter Splice variants", action="store_true")
-    parser.add_argument("--filterNonCoding", 
-                        help="Filter Non-coding variants", action="store_true")
-    parser.add_argument("--filterSyn", 
-                        help="Filter Synonymous variants", action="store_true")
-    parser.add_argument("--filterNonSyn", 
-                        help="Filter Non-Synonymous variants", action="store_true")
-    parser.add_argument("--filterCancerHotspot",
-                        help="Filter variants annotated as cancer hotspots", action="store_true")
-    parser.add_argument("--filterPolym", 
-                        help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
-    parser.add_argument("--filterRecurrence",
-                        help="Filter on recurrence values", action="store_true")
+    parser.add_argument("--filterLowQual", help="Filter low quality (i.e not PASS) variant", action="store_true")
+    parser.add_argument("--filterIndels", help="Filter insertions/deletions", action="store_true")
+    parser.add_argument("--filterCoding", help="Filter Coding variants", action="store_true")
+    parser.add_argument("--filterSplice", help="Filter Splice variants", action="store_true")
+    parser.add_argument("--filterNonCoding", help="Filter Non-coding variants", action="store_true")
+    parser.add_argument("--filterSyn", help="Filter Synonymous variants", action="store_true")
+    parser.add_argument("--filterNonSyn", help="Filter Non-Synonymous variants", action="store_true")
+    parser.add_argument("--filterCancerHotspot", help="Filter variants annotated as cancer hotspots", action="store_true")
+    parser.add_argument("--filterPolym", help="Filter polymorphism variants in genome databases. See --maf", action="store_true")
+    parser.add_argument("--filterRecurrence", help="Filter on recurrence values", action="store_true")
 
     # Databases
-    parser.add_argument("--polymDb", 
-                        help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
-    parser.add_argument("--cancerDb", 
-                        help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
+    parser.add_argument("--polymDb", help="Databases used for polymorphisms detection (comma separated)", default="gnomad")
+    parser.add_argument("--cancerDb", help="Databases used for cancer hotspot annotation (comma separated)", default="cosmic")
 
     # Others
-    parser.add_argument("--verbose", 
-                        help="Active verbose mode", action="store_true")
-    parser.add_argument("--debug", 
-                        help="Export original VCF with TMB_FILTER tag", action="store_true")
-    parser.add_argument("--export", 
-                        help="Export a VCF with the considered variants", action="store_true")
-    parser.add_argument("--version", 
-                        help="Version number", action='version', version="%(prog)s ("+__version__+")")
+    parser.add_argument("--verbose", help="Active verbose mode", action="store_true")
+    parser.add_argument("--debug", help="Export original VCF with TMB_FILTER tag", action="store_true")
+    parser.add_argument("--export", help="Export a VCF with the considered variants", action="store_true")
+    parser.add_argument("--version", help="Version number", action='version', version="%(prog)s ("+__version__+")")
 
     args = parser.parse_args()
     return (args)
@@ -401,21 +376,25 @@ def argsParse():
 
             # Variant Allele Frequency
             fval = getTag(variant, callerFlags['freq'])
-            if fval is not None and len(fval[fval < args.vaf]) == len(variant.ALT):
+            if fval is not None and len(fval[fval <= args.vaf]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "VAF"])
                 if not args.debug:
                     continue
 
             # Sequencing Depth
             dval = getTag(variant, callerFlags['depth'])
-            if dval is not None and len(dval[dval < args.minDepth]) == len(variant.ALT):
+            if dval is not None and len(dval[dval <= args.minDepth]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "DEPTH"])
                 if not args.debug:
                     continue
 
             # Alternative allele Depth
             ad = getTag(variant, callerFlags['altDepth'])
-            if ad is not None and len(ad[ad < args.minAltDepth]) == len(variant.ALT):
+            # case where AD = REF + ALTs
+            if len(ad) == (len(variant.ALT) + 1):
+                ad = ad[1:]
+
+            if ad is not None and len(ad[ad <= args.minAltDepth]) == len(variant.ALT):
                 debugInfo = ",".join([debugInfo, "ALTDEPTH"])
                 if not args.debug:
                     continue
diff --git a/test-op/dragon/D320_tmb_results_DRAGON.tsv b/test-op/dragon/D320_tmb_results_DRAGON.tsv
deleted file mode 100644
index 0eb3853..0000000
--- a/test-op/dragon/D320_tmb_results_DRAGON.tsv
+++ /dev/null
@@ -1,33 +0,0 @@
-Barcode	Nb_Mut1	TMB1	Nb_Mut2	TMB2	Nb_Mut3	TMB3	Nb_Mut4	TMB4	Nb_Mut1_woMatch	TMB1_woMatch	Nb_Mut2_woMatch	TMB2_woMatch	Nb_Mut3_woMatch	TMB3_woMatch	Nb_Mut4_woMatch	TMB4_woMatch
-D320R01	13	8.19	14	8.82	9	5.67	9	5.67	0	0	0	0	0	0	0	0
-D320R02	18	11.34	20	12.6	11	6.93	12	7.56	0	0	0	0	0	0	0	0
-D320R03	12	7.56	14	8.82	11	6.93	12	7.56	0	0	0	0	0	0	0	0
-D320R04	434	273.52	448	282.34	43	27.1	46	28.99	0	0	0	0	0	0	0	0
-D320R05	15	9.45	16	10.08	6	3.78	6	3.78	0	0	0	0	0	0	0	0
-D320R06	1310	825.6	1345	847.66	488	307.55	501	315.74	0	0	0	0	0	0	0	0
-D320R07	15	9.45	18	11.34	11	6.93	12	7.56	0	0	0	0	0	0	0	0
-D320R08	57	35.92	60	37.81	31	19.54	32	20.17	0	0	0	0	0	0	0	0
-D320R09	7	4.41	9	5.67	6	3.78	6	3.78	0	0	0	0	0	0	0	0
-D320R10	13	8.19	14	8.82	9	5.67	9	5.67	0	0	0	0	0	0	0	0
-D320R11	17	10.71	19	11.97	4	2.52	5	3.15	0	0	0	0	0	0	0	0
-D320R12	38	23.95	41	25.84	6	3.78	6	3.78	0	0	0	0	0	0	0	0
-D320R13	115	72.48	131	82.56	32	20.17	45	28.36	0	0	0	0	0	0	0	0
-D320R14	391	246.42	404	254.61	62	39.07	66	41.6	0	0	0	0	0	0	0	0
-D320R15	190	119.74	197	124.16	24	15.13	27	17.02	0	0	0	0	0	0	0	0
-D320R16	774	487.8	792	499.14	442	278.56	455	286.75	0	0	0	0	0	0	0	0
-D320R17	1802	1135.67	1843	1161.51	680	428.56	701	441.79	0	0	0	0	0	0	0	0
-D320R18	12	7.56	15	9.45	8	5.04	10	6.3	0	0	0	0	0	0	0	0
-D320R19	426	268.48	436	274.78	68	42.86	74	46.64	0	0	0	0	0	0	0	0
-D320R20	39	24.58	40	25.21	29	18.28	30	18.91	0	0	0	0	0	0	0	0
-D320R21	200	126.05	202	127.31	133	83.82	135	85.08	0	0	0	0	0	0	0	0
-D320R22	836	526.87	842	530.65	347	218.69	349	219.95	0	0	0	0	0	0	0	0
-D320R23	97	61.13	101	63.65	10	6.3	13	8.19	0	0	0	0	0	0	0	0
-D320R24	112	70.59	114	71.85	15	9.45	15	9.45	0	0	0	0	0	0	0	0
-D320R25	304	191.59	305	192.22	62	39.07	62	39.07	0	0	0	0	0	0	0	0
-D320R26	1086	684.43	1101	693.88	240	151.25	242	152.52	0	0	0	0	0	0	0	0
-D320R27	52	32.77	54	34.03	10	6.3	10	6.3	0	0	0	0	0	0	0	0
-D320R28	106	66.8	109	68.69	6	3.78	6	3.78	0	0	0	0	0	0	0	0
-D320R29	793	499.77	811	511.12	54	34.03	57	35.92	0	0	0	0	0	0	0	0
-D320R30	109	68.69	113	71.22	15	9.45	16	10.08	0	0	0	0	0	0	0	0
-D320R31	343	216.17	348	219.32	33	20.8	33	20.8	0	0	0	0	0	0	0	0
-D320R32	205	129.2	207	130.46	10	6.3	11	6.93	0	0	0	0	0	0	0	0
diff --git a/test-op/dragon/TMB_D320_results.tsv b/test-op/dragon/TMB_D320_results.tsv
deleted file mode 100644
index 318ca87..0000000
--- a/test-op/dragon/TMB_D320_results.tsv
+++ /dev/null
@@ -1,33 +0,0 @@
-Barcode	Nb_Mut1	TMB1	Nb_Mut2	TMB2	Nb_Mut3	TMB3	Nb_Mut4	TMB4
-D320R01	 10	 6.29	 11	 6.92	 8	 5.03	 8	 5.03
-D320R02	 13	 8.18	 17	 10.69	 10	 6.29	 10	 6.29
-D320R03	 11	 6.92	 15	 9.43	 10	 6.29	 10	 6.29
-D320R04	 404	 254.09	 416	 261.64	 37	 23.27	 37	 23.27
-D320R05	 8	 5.03	 9	 5.66	 6	 3.77	 6	 3.77
-D320R06	 1249	 785.53	 1279	 804.4	 472	 296.86	 472	 296.86
-D320R07	 19	 11.95	 22	 13.84	 17	 10.69	 17	 10.69
-D320R08	 58	 36.48	 61	 38.36	 36	 22.64	 36	 22.64
-D320R09	 6	 3.77	 9	 5.66	 5	 3.14	 5	 3.14
-D320R10	 15	 9.43	 16	 10.06	 13	 8.18	 13	 8.18
-D320R11	 21	 13.21	 23	 14.47	 11	 6.92	 11	 6.92
-D320R12	 23	 14.47	 26	 16.35	 7	 4.4	 7	 4.4
-D320R13	 85	 53.46	 103	 64.78	 37	 23.27	 37	 23.27
-D320R14	 367	 230.82	 379	 238.36	 62	 38.99	 62	 38.99
-D320R15	 165	 103.77	 170	 106.92	 24	 15.09	 24	 15.09
-D320R16	 748	 470.44	 767	 482.39	 427	 268.55	 427	 268.55
-D320R17	 1710	 1075.47	 1751	 1101.26	 640	 402.52	 640	 402.52
-D320R18	 8	 5.03	 11	 6.92	 8	 5.03	 8	 5.03
-D320R19	 389	 244.65	 398	 250.31	 66	 41.51	 66	 41.51
-D320R20	 41	 25.79	 42	 26.42	 30	 18.87	 30	 18.87
-D320R21	 192	 120.75	 194	 122.01	 124	 77.99	 124	 77.99
-D320R22	 791	 497.48	 795	 500.0	 323	 203.14	 323	 203.14
-D320R23	 86	 54.09	 89	 55.97	 9	 5.66	 9	 5.66
-D320R24	 94	 59.12	 96	 60.38	 11	 6.92	 11	 6.92
-D320R25	 369	 232.08	 370	 232.7	 70	 44.03	 70	 44.03
-D320R26	 1181	 742.77	 1196	 752.2	 261	 164.15	 261	 164.15
-D320R27	 36	 22.64	 40	 25.16	 7	 4.4	 7	 4.4
-D320R28	 91	 57.23	 93	 58.49	 10	 6.29	 10	 6.29
-D320R29	 763	 479.87	 776	 488.05	 50	 31.45	 50	 31.45
-D320R30	 80	 50.31	 85	 53.46	 13	 8.18	 13	 8.18
-D320R31	 276	 173.58	 281	 176.73	 31	 19.5	 31	 19.5
-D320R32	 215	 135.22	 217	 136.48	 10	 6.29	 10	 6.29
diff --git a/test-op/dragon/results/D326_tmb_results_DRAGON.tsv b/test-op/dragon/results/D326_tmb_results_DRAGON.tsv
new file mode 100644
index 0000000..d49fe06
--- /dev/null
+++ b/test-op/dragon/results/D326_tmb_results_DRAGON.tsv
@@ -0,0 +1,27 @@
+Barcode	Nb_Mut1	TMB1	Nb_Mut2	TMB2	Nb_Mut3	TMB3	Nb_Mut4	TMB4	Nb_Mut1_woMatch	TMB1_woMatch	Nb_Mut2_woMatch	TMB2_woMatch	Nb_Mut3_woMatch	TMB3_woMatch	Nb_Mut4_woMatch	TMB4_woMatch
+D326R01	10	6.3	12	7.56	8	5.04	8	5.04	0	0	0	0	0	0	0	0
+D326R02	13	8.19	14	8.82	9	5.67	9	5.67	0	0	0	0	0	0	0	0
+D326R03	11	6.93	13	8.19	7	4.41	7	4.41	0	0	0	0	0	0	0	0
+D326R04	13	8.19	14	8.82	10	6.3	10	6.3	0	0	0	0	0	0	0	0
+D326R05	136	85.71	141	88.86	14	8.82	14	8.82	0	0	0	0	0	0	0	0
+D326R06	662	417.21	679	427.93	81	51.05	82	51.68	0	0	0	0	0	0	0	0
+D326R07	10	6.3	12	7.56	4	2.52	4	2.52	0	0	0	0	0	0	0	0
+D326R08	12	7.56	17	10.71	8	5.04	10	6.3	0	0	0	0	0	0	0	0
+D326R09	10	6.3	13	8.19	7	4.41	8	5.04	0	0	0	0	0	0	0	0
+D326R10	12	7.56	13	8.19	9	5.67	9	5.67	0	0	0	0	0	0	0	0
+D326R11	10	6.3	12	7.56	8	5.04	9	5.67	0	0	0	0	0	0	0	0
+D326R12	57	35.92	63	39.7	31	19.54	32	20.17	0	0	0	0	0	0	0	0
+D326R13	17	10.71	20	12.6	9	5.67	10	6.3	0	0	0	0	0	0	0	0
+D326R14	12	7.56	13	8.19	8	5.04	8	5.04	0	0	0	0	0	0	0	0
+D326R15	26	16.39	28	17.65	16	10.08	16	10.08	0	0	0	0	0	0	0	0
+D326R16	7	4.41	9	5.67	5	3.15	5	3.15	0	0	0	0	0	0	0	0
+D326R17	17	10.71	20	12.6	10	6.3	12	7.56	17	10.71	20	12.6	10	6.3	12	7.56
+D326R18	15	9.45	17	10.71	9	5.67	10	6.3	0	0	0	0	0	0	0	0
+D326R19	20	12.6	24	15.13	15	9.45	18	11.34	0	0	0	0	0	0	0	0
+D326R20	16	10.08	18	11.34	13	8.19	13	8.19	0	0	0	0	0	0	0	0
+D326R21	14	8.82	17	10.71	8	5.04	8	5.04	0	0	0	0	0	0	0	0
+D326R22	11	6.93	13	8.19	7	4.41	8	5.04	0	0	0	0	0	0	0	0
+D326R23	13	8.19	16	10.08	9	5.67	11	6.93	0	0	0	0	0	0	0	0
+D326R24	13	8.19	14	8.82	10	6.3	10	6.3	0	0	0	0	0	0	0	0
+D326R25	6	3.78	8	5.04	4	2.52	5	3.15	0	0	0	0	0	0	0	0
+D326R26	4	2.52	6	3.78	4	2.52	5	3.15	0	0	0	0	0	0	0	0
diff --git a/test-op/dragon/results/TMB_D326_results_v1.2.0.tsv b/test-op/dragon/results/TMB_D326_results_v1.2.0.tsv
new file mode 100644
index 0000000..838cb1c
--- /dev/null
+++ b/test-op/dragon/results/TMB_D326_results_v1.2.0.tsv
@@ -0,0 +1 @@
+Barcode	Nb_Mut1	TMB1	Nb_Mut2	TMB2	Nb_Mut3	TMB3	Nb_Mut4	TMB4
diff --git a/test-op/dragon/run.sh b/test-op/dragon/run.sh
index 90cc0a8..586dcdd 100644
--- a/test-op/dragon/run.sh
+++ b/test-op/dragon/run.sh
@@ -18,10 +18,10 @@ do
 
     echo ${SAMPLE}
 
-    #awk -v sample=${SAMPLE} '$1==sample{print}' ${REC} > ${SAMPLE}_table_report_rec.tsv
+    awk -v sample=${SAMPLE} '$1==sample{print}' ${REC} > ${SAMPLE}_table_report_rec.tsv
     cmd="time python addRec.py -i ${VCF} -r ${SAMPLE}_table_report_rec.tsv -o ${SAMPLE}_rec.vcf"
     echo $cmd
-    #eval $cmd
+    eval $cmd
 
     IVCF="${SAMPLE}_rec.vcf"
     OFILE=$(basename $VCF | sed -e 's/.hg19_multianno.vcf//')
@@ -55,7 +55,7 @@ do
     tmb4=$(grep "TMB=" ${OFILE}_TMB4.txt | awk -F"=" '{print $2}')
     
     cmd="rm ${SAMPLE}_table_report_rec.tsv ${SAMPLE}_rec.vcf *.txt"
-    #eval $cmd
+    eval $cmd
     
     echo -e "${SAMPLE}\t${nbvar1}\t${tmb1}\t${nbvar2}\t${tmb2}\t${nbvar3}\t${tmb3}\t${nbvar3}\t${tmb3}" >> TMB_${RUN}_results.tsv
 done