-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmgf_splitter.py
88 lines (78 loc) · 3.75 KB
/
mgf_splitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
## This tool is developed by Mr. Sandeep Kasargod and Mr. Chinmaya Narayana K.
## If you have any queries please contact [email protected] or [email protected]
## This tool is licensed under MIT License
## For more information please visit https://github.com/chinmayaNK22/MGF-file-splitter
import argparse
import os
parser = argparse.ArgumentParser(description='''Extract raw file specific spectra from an unassigned spectra file (.mgf) from Proteome Discoverer and generate new mgf file specific to raw files''')
parser.add_argument('infile', metavar='-ip', type=str, nargs='+', help='MGF file path')
args = parser.parse_args()
dicts_mz = {}
dicts_mz_info = {}
def read_mgf(infile):
for i in open(infile):
if "BEGIN IONS" in i.rstrip():
title = ""
pepmass = ""
rt = ""
charge=""
scans=""
lst = []
if "TITLE" in i.rstrip():
file_name = i.split('\\')[-1].split(';')[0].replace('"', '')
title = i.rstrip()
if "PEPMASS" in i.rstrip():
pepmass = i.rstrip()
if "CHARGE" in i.rstrip():
charge = i.rstrip()
if "RTINSECONDS" in i.rstrip():
rt = i.rstrip()
if "SCANS" in i.rstrip():
scans = i.rstrip()
if len(i.rstrip()) > 2 and i.rstrip()[0].isdigit():
lst.append(i.rstrip())
if "END IONS" in i.rstrip():
if file_name not in dicts_mz:
dicts_mz[file_name] = [lst]
dicts_mz_info[file_name] = [title + "@" + pepmass + "@" + rt.rstrip() + "@" + charge + "@" + scans]
else:
dicts_mz[file_name].append(lst)
dicts_mz_info[file_name].append(title + "@" + pepmass + "@" + rt + "@" + charge + "@" + scans)
def split_raw_file_spectra(mgf):
read_mgf(mgf)
try:
folder = os.makedirs(mgf.rstrip('.mgf'))
except:
print ('Folder ' + mgf.rstrip('.mgf') + ' already present')
for k, v in dicts_mz.items():
if k.split('.')[-1] == 'raw':
outfile = os.path.join(mgf.rstrip('.mgf'),k.rstrip('.raw') + '.mgf')
write_file = open(outfile, 'w')
for iters in range(len(v)):
dicts_mz_info_1 = dicts_mz_info[k][iters].split('@')
write_file.write("BEGIN IONS" + '\n')
write_file.write(dicts_mz_info_1[0] + '\n')
write_file.write(dicts_mz_info_1[1] + '\n')
write_file.write(dicts_mz_info_1[2] + '\n')
write_file.write(dicts_mz_info_1[3] + '\n')
write_file.write(dicts_mz_info_1[4] + '\n')
for iter_mz in dicts_mz[k][iters]:
write_file.write(iter_mz + '\n')
write_file.write("END IONS" + "\n")
write_file.close()
elif k.split('.')[-1] == 'mgf':
outfile = os.path.join(mgf.rstrip('.mgf'),k)
write_file = open(outfile, 'w')
for iters in range(len(v)):
dicts_mz_info_1 = dicts_mz_info[k][iters].split('@')
write_file.write("BEGIN IONS" + '\n')
write_file.write(dicts_mz_info_1[0] + '\n')
write_file.write(dicts_mz_info_1[1] + '\n')
write_file.write(dicts_mz_info_1[2] + '\n')
write_file.write(dicts_mz_info_1[3] + '\n')
write_file.write(dicts_mz_info_1[4] + '\n')
for iter_mz in dicts_mz[k][iters]:
write_file.write(iter_mz + '\n')
write_file.write("END IONS" + "\n")
write_file.close()
split_raw_file_spectra(args.infile[0])