Skip to content

Commit

Permalink
samjuncs update
Browse files Browse the repository at this point in the history
  • Loading branch information
belgravia committed Feb 8, 2022
1 parent 3d71f56 commit e248d2c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Alternatively, the user can align the reads themselves with their aligner of cho

**Usage:**
```sh
python flair.py align -g genome.fa -r <reads.fq>|<reads.fa> [options]
python flair.py align -g genome.fa -r <reads.fq.gz>|<reads.fq>|<reads.fa> [options]
```
run with `--help` for a description of optional arguments. Outputs (1) `sam` of raw aligned reads and (2) smoothed `bed12` file of aligned reads to be supplied to flair-correct.

Expand Down Expand Up @@ -74,7 +74,7 @@ Alternatively, the `-j` argument for flair-correct can also be generated using S
### <a name="collapse"></a>flair collapse
Defines high-confidence isoforms from corrected reads. As FLAIR does not use annotations to collapse isoforms, FLAIR will pick the name of a read that shares the same splice junction chain as the isoform to be the isoform name. It is recommended to still provide an annotation with `-f`, which is used to rename FLAIR isoforms that match isoforms in existing annotation according to the transcript_id field in the gtf. Intermediate files generated by this step are removed by default, but can be retained for debugging purposes by supplying the argument `--keep_intermediate` and optionally supplying a directory to keep those files with `--temp_dir`.

If there are multiple samples to be compared, the flair-corrected read `psl` files should be concatenated prior to running flair-collapse. In addition, all raw read fastq/fasta files should either be specified after `-r` with space/comma separators or concatenated into a single file.
If there are multiple samples to be compared, the flair-corrected read `bed` or `psl` files should be concatenated prior to running flair-collapse. In addition, all raw read fastq/fasta files should either be specified after `-r` with space/comma separators or concatenated into a single file.

**Usage:**
```sh
Expand Down
17 changes: 11 additions & 6 deletions bin/samJuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class SAM(object):
Handles sequence alignment format file input and output.
'''

def __init__(self, inFile=None, isHISAT=False, fetch=''):
def __init__(self, inFile=None, isHISAT=False, fetch='', keep_supplementary=False):
# Attributes
self.inFile = inFile
self.fetch = fetch
Expand All @@ -87,6 +87,7 @@ def __init__(self, inFile=None, isHISAT=False, fetch=''):
sys.exit(1)

self.strandInfo = {0:'+', 16:'-'}
self.supplementary_strandInfo = {2048:'+', 2064:'-'}
#print(self.reader.find_introns((read for read in self.reader.fetch() if read.is_reverse)))

#sys.exit(1)
Expand All @@ -96,6 +97,8 @@ def __init__(self, inFile=None, isHISAT=False, fetch=''):
else:
self.inferJuncStrand = self.inferMM2JuncStrand

self.keep_supplementary = keep_supplementary

def inferMM2JuncStrand(self, read):
# minimap gives junction strand denoted as 'ts'
# the sign corresponds to the alignment orientation, where + agrees and - disagrees
Expand Down Expand Up @@ -162,14 +165,17 @@ def readJuncs(self):
'''
Returns start, end and junctions from a single read.
'''

allskipped={}
for read in self.reader.fetch():

try:
# Skip unmapped or multimapped reads.
strand = self.strandInfo[read.flag]
except:
continue
if self.keep_supplementary and read.flag in self.supplementary_strandInfo:
strand = self.supplementary_strandInfo[read.flag]
else:
continue

qName = read.query_name
chromosome = read.reference_name
Expand All @@ -189,7 +195,6 @@ def readJuncs(self):
orientation = read.flag

juncDir = self.inferJuncStrand(read)

for num, flagTuple in enumerate(cigar,1):
flag, length = flagTuple
if flag not in [0,2,3,7,8]:
Expand All @@ -201,6 +206,7 @@ def readJuncs(self):
refPos = refEnd+length
refEnd = refPos


# Last is the end
rend = refEnd

Expand All @@ -210,7 +216,6 @@ def readJuncs(self):
def runCMD(x):
fetch, alignType, bam = x
bObj = SAM(bam, alignType, fetch)
print(bObj)
return bObj.countJuncs()


Expand Down Expand Up @@ -238,7 +243,7 @@ def main():

#results = p.imap_unordered(runCMD, tqdm(referenceIDs, desc="Parsing BAM for junctions", total=len(referenceIDs)))
results = p.map(runCMD, referenceIDs)
print(results)



# print(results[0])
Expand Down

0 comments on commit e248d2c

Please sign in to comment.