-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathshrimp_plot.py
103 lines (82 loc) · 3.99 KB
/
shrimp_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python3
"""
"""
import os
import pandas as pd
import json
import plotly.graph_objs as go
from plotly.offline import plot
try:
import utils
except ImportError:
from templates import utils
__version__ = "0.0.1"
__build__ = "15.12.2020"
__template__ = "PROCESS_SHRIMP-nf"
logger = utils.get_logger(__file__)
if __file__.endswith(".command.sh"):
PHRED_FILES = '$phred_files'.split()
logger.debug("Running {} with parameters:".format(
os.path.basename(__file__)))
logger.debug("PHRED_FILES: {}".format(PHRED_FILES))
def main(phred_files):
"""
:param phred_files:
:return:
"""
df_phred = pd.DataFrame(columns=['Sample', 'Assembler', 'Reference', 'Contig', 'Contig Length',
'Phred Quality Score'])
for file_phred in phred_files:
print(file_phred)
sample_name = os.path.basename(file_phred).split('_')[0]
with open(file_phred) as fh:
next(fh) # skip header line
for line in fh:
print(line)
line = line.split(',')
assembler = line[1]
reference = line[2]
contig = line[3]
contig_length = line[4]
phred_score = line[5]
df_phred = df_phred.append({'Sample': sample_name, 'Assembler': assembler, 'Reference': reference,
'Contig': contig, 'Contig Length': contig_length,
'Phred Quality Score': phred_score}, ignore_index=True)
# Create plot
report_dict = {}
for sample in sorted(df_phred['Sample'].unique()):
print(sample)
for reference in sorted(df_phred['Reference'].unique()):
fig_phred = go.Figure()
i = 0
for assembler in sorted(df_phred['Assembler'].unique(), key=lambda v: v.upper()):
fig_phred.add_trace(go.Scatter(y=df_phred['Phred Quality Score'][(df_phred['Reference'] == reference) &
(df_phred['Assembler'] == assembler) &
(df_phred['Sample'] == sample)],
x=df_phred['Contig Length'][(df_phred['Reference'] == reference) &
(df_phred['Assembler'] == assembler) &
(df_phred['Sample'] == sample)],
name=assembler,
opacity=0.7,
mode='markers',
marker=dict(color=utils.COLOURS[i], size=12, line=dict(width=1, color='black'))))
i += 1
fig_phred.update_layout(xaxis_title="Contig size",
yaxis_title="Score",
plot_bgcolor='rgb(255,255,255)',
xaxis=dict(showline=True, zeroline=False, linewidth=1, linecolor='black',
gridcolor='#DCDCDC'))
plot(fig_phred, filename='{0}_{1}_phred.html'.format(sample, reference.replace(' ', '_')), auto_open=False)
plot_species = fig_phred.to_json()
if sample not in report_dict.keys():
report_dict[sample] = {"PlotData": {reference: [plot_species]}}
else:
if reference not in report_dict[sample]["PlotData"].keys():
report_dict[sample]["PlotData"][reference] = [plot_species]
else:
report_dict[sample]["PlotData"][reference].append(plot_species)
print(report_dict[sample]['PlotData'].keys())
with open("phred.json", "w") as json_report:
json_report.write(json.dumps(report_dict, separators=(",", ":")))
if __name__ == '__main__':
main(PHRED_FILES)