-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagglomerate.py
69 lines (62 loc) · 2.62 KB
/
agglomerate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import re
list_sp = [
"Aa, Aquifex aeolicus",
"Af, Archaeoglobus fulgidus",
"Ap, Aeropyrum pernix",
"Bb, Borrelia burgdorferi",
"Bh, Bacillus halodurans",
"Bs, Bacillus subtilis",
"Cj, Campylobacter jejuni",
"Cp, Chlamydia pneumoniae",
"Ct, Chlamydia trachomatis",
"Dr, Deinococcus radiodurans",
"c, Escherichia coli",
"Hp, Helicobacter pylori",
"Mth, Methanothermobacter thermautotrophicus",
"Mtu, Mycobacterium tuberculosis",
"Nm, Neisseria meningitidis",
"Pa, Pseudomonas aeruginosa",
"Rp, Rickettsia prowazekii",
"Ssp, Synechocystis sp",
"Tm, Thermotoga maritima",
"Tp, Treponema pallidum",
"Vc, Vibrio cholerae",
"Uu, Ureaplasma urealyticum",
"Xf, Xylella fastidiosa"
]
dico_out = {}
for nom in list_sp:
dico_out[nom.split(", ")[1]] = {"nom": nom}
specie = re.compile(r'[A-Z][a-z]+\s[a-z]{2,}')
with open("/home/eliot/Documents/Travail/M1/Projets/ProjetOBIS1/Phe t-RNA/raw_protein_sequence.fasta",
"r") as prot_file:
for line in prot_file.readlines():
if line[0] == '>':
if specie.search(line):
latin = specie.search(line).group(0)
dico_out[latin]["prot"] = line.replace(">", "").split(" ")[0].replace("_", "\_")
else:
dico_out[line]["prot"] = line.replace(">", "").split(" ")[0].replace("_", "\_")
specie = re.compile(r'[A-Z][a-z]+\s[a-z]{2,}')
with open("/home/eliot/Documents/Travail/M1/Projets/ProjetOBIS1/Phe t-RNA/DNA_pheT.fasta", "r") as dna_file:
for line in dna_file.readlines():
if line[0] == '>':
latin = specie.search(line).group(0)
try:
dico_out[latin]["dna"] = line.replace(">", "").split(":")[0]
except:
pass
for specie in dico_out:
if "prot" in dico_out[specie]:
if "dna" in dico_out[specie]:
print("\t{} & \\textit{{{}}} & {} \\\\".format(dico_out[specie]["nom"],
dico_out[specie]["dna"],
dico_out[specie]["prot"]))
else:
print("\t{} & \\textit{{{}}} & {} \\\\".format(dico_out[specie]["nom"],
"Introuvable",
dico_out[specie]["prot"]))
else:
print("\t{} & \\textit{{{}}} & \\textit{{{}}} \\\\".format(dico_out[specie]["nom"],
"Gene loss",
"Protein loss"))