-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbed6_to_gtf_TE_annotation.py
58 lines (44 loc) · 1.93 KB
/
bed6_to_gtf_TE_annotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
# Author: Jeffrey Grover
# Purpose: Convert a .bed file with 6 columns into a gtf format for TE annotation
# Created: 09/2017
# This script accepts a nonstandard .bed file generated by a repeatmasker
# workflow in the format:
# chr start stop TE-family TE-type strand
# and outputs a gtf file for use with differential expression workflows
from argparse import ArgumentParser
def bed_to_gtf(input_file, anno_source):
with open(input_file, 'r') as input_handle:
for line in input_handle:
entry = line.split()
chrom = 'Chr_' + entry[0]
start = int(entry[1]) + 1 # To convert to 1-based coordinates
stop = int(entry[2])
name_family = entry[3]
te_type = entry[4]
strand = entry[5]
print(chrom, anno_source, 'transposable_element', start, stop, '.',
strand, '.',
'gene_id "%s"; gene_name "%s"; transposable_element_family_ID "%s"; transposable_element_type "%s";'
% (name_family, name_family, name_family, te_type), sep='\t')
# Parse command line options
def get_args():
parser = ArgumentParser(
description=
'Converts a .bed file with 6 columns to gtf. It requires that you know the '
'feature type and source fields and input them using the appropriate '
'command line option. This was created to convert a bed file for TE '
'annotations, so use for other purposes will require modification.')
parser.add_argument('input_path',
help='File to process',
metavar='FILE.bed')
parser.add_argument('--source',
help='Source of the annotation',
type=str)
return parser.parse_args()
# Process the file
def main(args):
bed_to_gtf(args.input_path, args.source)
if __name__ == "__main__":
args = get_args()
main(args)