From e248d2cec3a0dc8c0c291da6f19b2abdd211d80e Mon Sep 17 00:00:00 2001
From: Alison Tang <atang14@ucsc.edu>
Date: Mon, 7 Feb 2022 21:21:53 -0800
Subject: [PATCH] samjuncs update

---
 README.md       |  4 ++--
 bin/samJuncs.py | 17 +++++++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index 6888ffe..a287d81 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ Alternatively, the user can align the reads themselves with their aligner of cho
 
 **Usage:**
 ```sh
-python flair.py align -g genome.fa -r <reads.fq>|<reads.fa> [options]
+python flair.py align -g genome.fa -r <reads.fq.gz>|<reads.fq>|<reads.fa> [options]
 ```
 run with `--help` for a description of optional arguments. Outputs (1) `sam` of raw aligned reads and (2) smoothed `bed12` file of aligned reads to be supplied to flair-correct.
 
@@ -74,7 +74,7 @@ Alternatively, the `-j` argument for flair-correct can also be generated using S
 ### <a name="collapse"></a>flair collapse
 Defines high-confidence isoforms from corrected reads. As FLAIR does not use annotations to collapse isoforms, FLAIR will pick the name of a read that shares the same splice junction chain as the isoform to be the isoform name. It is recommended to still provide an annotation with `-f`, which is used to rename FLAIR isoforms that match isoforms in existing annotation according to the transcript_id field in the gtf. Intermediate files generated by this step are removed by default, but can be retained for debugging purposes by supplying the argument `--keep_intermediate` and optionally supplying a directory to keep those files with `--temp_dir`.
 
-If there are multiple samples to be compared, the flair-corrected read `psl` files should be concatenated prior to running flair-collapse. In addition, all raw read fastq/fasta files should either be specified after `-r` with space/comma separators or concatenated into a single file.
+If there are multiple samples to be compared, the flair-corrected read `bed` or `psl` files should be concatenated prior to running flair-collapse. In addition, all raw read fastq/fasta files should either be specified after `-r` with space/comma separators or concatenated into a single file.
 
 **Usage:**
 ```sh
diff --git a/bin/samJuncs.py b/bin/samJuncs.py
index 986541f..9d1382c 100644
--- a/bin/samJuncs.py
+++ b/bin/samJuncs.py
@@ -73,7 +73,7 @@ class SAM(object):
     Handles sequence alignment format file input and output.
     '''
 
-    def __init__(self, inFile=None, isHISAT=False, fetch=''):
+    def __init__(self, inFile=None, isHISAT=False, fetch='', keep_supplementary=False):
         # Attributes
         self.inFile = inFile
         self.fetch = fetch
@@ -87,6 +87,7 @@ def __init__(self, inFile=None, isHISAT=False, fetch=''):
             sys.exit(1)
     
         self.strandInfo = {0:'+', 16:'-'}
+        self.supplementary_strandInfo = {2048:'+', 2064:'-'}
         #print(self.reader.find_introns((read for read in self.reader.fetch() if read.is_reverse)))
 
         #sys.exit(1)
@@ -96,6 +97,8 @@ def __init__(self, inFile=None, isHISAT=False, fetch=''):
         else:
             self.inferJuncStrand = self.inferMM2JuncStrand
 
+        self.keep_supplementary = keep_supplementary
+
     def inferMM2JuncStrand(self, read):
         # minimap gives junction strand denoted as 'ts'
         # the sign corresponds to the alignment orientation, where + agrees and - disagrees
@@ -162,14 +165,17 @@ def readJuncs(self):
         '''
         Returns start, end and junctions from a single read.
         '''
-
+        allskipped={}
         for read in self.reader.fetch():
 
             try:
                 # Skip unmapped or multimapped reads.
                 strand = self.strandInfo[read.flag]
             except:
-                continue
+                if self.keep_supplementary and read.flag in self.supplementary_strandInfo:
+                    strand = self.supplementary_strandInfo[read.flag]
+                else:
+                    continue
 
             qName = read.query_name
             chromosome = read.reference_name
@@ -189,7 +195,6 @@ def readJuncs(self):
             orientation = read.flag
 
             juncDir = self.inferJuncStrand(read)
-
             for num, flagTuple in enumerate(cigar,1):
                 flag, length = flagTuple 
                 if flag not in [0,2,3,7,8]:
@@ -201,6 +206,7 @@ def readJuncs(self):
                 refPos = refEnd+length
                 refEnd = refPos
 
+
             # Last is the end
             rend = refEnd
 
@@ -210,7 +216,6 @@ def readJuncs(self):
 def runCMD(x):
     fetch, alignType, bam = x
     bObj = SAM(bam, alignType, fetch)
-    print(bObj)
     return bObj.countJuncs()
 
 
@@ -238,7 +243,7 @@ def main():
 
     #results = p.imap_unordered(runCMD, tqdm(referenceIDs, desc="Parsing BAM for junctions", total=len(referenceIDs)))
     results = p.map(runCMD, referenceIDs)
-    print(results)
+
 
 
     # print(results[0])