diff --git a/umi_tools/group.py b/umi_tools/group.py index a821e537..39e8f537 100644 --- a/umi_tools/group.py +++ b/umi_tools/group.py @@ -79,6 +79,7 @@ import sys import collections import os +import itertools # required to make iteritems python2 and python3 compatible from builtins import dict @@ -182,7 +183,7 @@ def main(argv=None): if options.tsv: mapping_outfile = U.openFile(options.tsv, "w") mapping_outfile.write("%s\n" % "\t".join( - ["read_id", "contig", "position", "gene", "umi", "umi_count", + ["read_id", "contig", "position", "position2", "gene", "umi", "umi_count", "final_umi", "final_umi_count", "unique_id"])) nInput, nOutput, unique_id, input_reads, output_reads = 0, 0, 0, 0, 0 @@ -253,22 +254,32 @@ def main(argv=None): for umi in umi_group: reads = bundle[umi]['read'] - for read in reads: + reads2 = bundle[umi]['read2'] if options.paired else [None] + for read, read2 in itertools.izip(reads, reads2): if outfile: # Add the 'UG' tag to the read read.tags += [('UG', unique_id)] read.tags += [(options.umi_group_tag, top_umi)] outfile.write(read) + if read2 is not None: + read2.tags += [('UG', unique_id)] + read2.tags += [(options.umi_group_tag, top_umi)] + outfile.write(read2) if options.tsv: if options.per_gene: + # XXX: What if the two reads disagree? gene = read.get_tag(gene_tag) else: gene = "NA" + pos = umi_methods.get_read_position( + read, options.soft_clip_threshold)[1] + pos2 = umi_methods.get_read_position( + read2, options.soft_clip_threshold)[1] if read2 is not None else "" mapping_outfile.write("%s\n" % "\t".join(map(str, ( read.query_name, read.reference_name, - umi_methods.get_read_position( - read, options.soft_clip_threshold)[1], + pos, + pos2, gene, umi.decode(), counts[umi],