Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core report functionality + documentation in README #28

Closed
wants to merge 12 commits into from
Closed
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ static
node

slurm-*.out

.html
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ dashboard
start a web app that visualizes observed and expected mutation counts as a function of genomic coordinate
predict
call genomic regions predicted to be under negative selection [not yet implemented]
report
create .html plot displaying gene transcripts with introns compressed, observed and expected mutation counts, lollipops, tracks, and sequence coverage
```

Required arguments for `train` are:
Expand Down Expand Up @@ -93,6 +95,23 @@ Optionally, the user may change this by specifying the `--model` argument:
Path to a neutral model produced by the train sub-command (in json format). This model is used to compute the expected mutation counts in the visualization.
```

Required arguments for `report` are:

```
--config_file STR
Path to .yaml configuration file (example: reports/constraintview.yaml).
--output_file STR
File name of output plot.
```

By default, the `report` subcommand plots all transcripts plus flattened-exons.
Optionally, the user may change this by specifying the `--transcripts` argument:

```
--transcripts STR
Specifies which transcripts to plot. Can be 'all' for all transcripts or '['<transcript_name_1>', '<transcript_name_2>', ...]' for specific transcripts.
```

## Input Data

Assuming one has access to the protected environment on the CHPC at University of Utah,
Expand Down
4 changes: 3 additions & 1 deletion constraint-tools
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ export PYTHONPATH="${CONSTRAINT_TOOLS}/utilities:${CONSTRAINT_TOOLS}/predict-con
PATH="${CONSTRAINT_TOOLS}/bin:$PATH"
PATH="${CONSTRAINT_TOOLS}/train-model:$PATH"
PATH="${CONSTRAINT_TOOLS}/flask-app:${PATH}"
PATH="${CONSTRAINT_TOOLS}/report:${PATH}"

#######################################

tool="${1}"
info "\nTool: ${tool}\n"
shift
Expand All @@ -36,6 +36,8 @@ if [[ ${tool} == "train" ]]; then
train-model ${args}
elif [[ ${tool} == "dashboard" ]]; then
flask-app ${args}
elif [[ ${tool} == "report" ]]; then
constraintview ${args}
else
error "invalid tool: ${tool}"
exit 1
Expand Down
41 changes: 41 additions & 0 deletions report/axes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from bokeh.models import LinearAxis,Range1d

def add_axis(plot, plot_params, axis_label, y_max, min_tick, max_tick, tick_precision, tick_scientific_notation, y_min=0, num_ticks=3, axis_position='right', visible=True):

ticks = [(max_tick-min_tick)/(num_ticks+1)*i+min_tick for i in range(num_ticks+2)]
ticks = list(map(lambda x: int(x) if int(x)==x else x , ticks)) #have to do this for overrides to work on ticks ending in .0
ticker_dict = {tick:str((y_max-y_min)/(num_ticks+1)*(idx)+y_min) for idx,tick in enumerate(ticks)}
#tick formatting
ticker_dict = format_ticks(ticker_dict, tick_precision, tick_scientific_notation)
plot.extra_y_ranges[axis_label] = Range1d(0,plot_params['plot_height'])
axis = LinearAxis(y_range_name=axis_label, axis_label=axis_label, ticker=ticks, major_label_overrides=ticker_dict, visible=visible)
plot.add_layout(axis, axis_position)
return axis

def add_user_axis(plot, plot_params, user_line_params, axis_name, y_max, min_tick, max_tick, y_min=0, num_ticks=3, axis_position='right', visible=True):
axis_label = user_line_params[axis_name]['y_axis_label']
axis = add_axis(plot, plot_params, axis_label, y_max, min_tick, max_tick, user_line_params[axis_name]['tick_precision'], user_line_params[axis_name]['tick_scientific_notation'], y_min=y_min, num_ticks=num_ticks, axis_position=axis_position, visible=visible)
return axis

def add_variant_axis(plot_params, variant_params, plot, axis_label, allele_vals, visible=True):
y_min = min(allele_vals)
y_max = max(allele_vals)
min_tick = plot_params['y0']+variant_params['min_lollipop_height']
max_tick = plot_params['plot_height']-variant_params['lollipop_radius']-variant_params['lollipop_line_width']
tick_precision = 2 #TODO
tick_scientific_notation = False
num_ticks = 3
axis_position = 'left'
visible = visible
axis = add_axis(plot, plot_params, axis_label, y_max, min_tick, max_tick, tick_precision, tick_scientific_notation, y_min=y_min, num_ticks=num_ticks, axis_position=axis_position, visible=visible)
return axis


def format_ticks(ticker_dict, tick_precision, tick_scientific_notation):
for key in ticker_dict:
if int(float(ticker_dict[key])) == float(ticker_dict[key]): ticker_dict[key] = str(int(float(ticker_dict[key])))
for key in ticker_dict:
if tick_scientific_notation: format_precision = '.{}e'.format(tick_precision)
else: format_precision = '.{}f'.format(tick_precision)
ticker_dict = {key:format(float(value),format_precision) for (key,value) in ticker_dict.items()}
return ticker_dict
37 changes: 37 additions & 0 deletions report/colors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
import random

def lighten_hex_color(color, delta):
def limit(x):
if x > 255: return 255
if x < 0: return 0
return x

if '#' in color: color = color[1:]

r = hex(limit(int(color[:2], 16) + delta))[2:]
g = hex(limit(int(color[2:4], 16) + delta))[2:]
b = hex(limit(int(color[4:6], 16) + delta))[2:]

new_color = '#{}{}{}'.format(r, g, b)
return new_color

def color_variants(plot_params, variant_params, variant_ls):
#colors = [variant_params['variant_severity_colors'][v['severity']] for v in variant_ls]
for v in variant_ls:
#v['color'] = variant_params['variant_severity_colors'][v['severity']]
if v['severity']: v['color'] = variant_params['variant_severity_colors'][v['severity']]
else: v['color'] = plot_params['glyph_colors']['variant']


def color_boxes(plot_params, user_track_params, track_name, boxes):
IDs = np.unique([b['ID'] for b in boxes])
color_dict = {ID: random.choice(np.unique(plot_params['track_colors'])) for ID in IDs}

try:
for (ID, color) in user_track_params[track_name]['colors'].items():
color_dict[ID] = color
except: pass

for idx in range(len(boxes)):
boxes[idx]['color'] = color_dict[boxes[idx]['ID']]
19 changes: 19 additions & 0 deletions report/constraintview
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

#config_file="constraintview.yaml"
transcripts="all"
output="plot.html"

while [[ "$1" =~ ^- ]]; do
case $1 in
--config_file ) shift; [[ ! $1 =~ ^- ]] && config_file=$1;;
--transcripts ) shift; [[ ! $1 =~ ^- ]] && transcripts=$1;;
--output_file ) shift; [[ ! $1 =~ ^- ]] && output_file=$1;;
*) error "$0: $1 is an invalid flag"; exit 1;;
esac
shift
done

cd ${CONSTRAINT_TOOLS}/report
python create_constraint_txt.py ${config_file}
python constraintview.py ${config_file} ${transcripts} -o ${output_file}
228 changes: 228 additions & 0 deletions report/constraintview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
from process_gene_gff import gff_to_db, get_gene_feature, get_transcript_dict
from get_coords import get_variants,get_line,get_track
from map_coords import map_line
from colors import color_boxes, color_variants
from axes import add_user_axis,add_variant_axis
from glyphs import add_intron_glyph, add_exon_glyph, add_variant_glyph, add_UTR_glyph, add_track_glyph, add_multi_line_glyph
from widget_callbacks import add_checkbox,add_user_tracks_checkbox,add_user_lines_checkbox,add_smoothing_slider,add_legend,add_linear_log_scale,add_exon_zoom
import project_coords
import numpy as np
import argparse
from bokeh.plotting import figure, output_file, save
from bokeh.layouts import column, row, gridplot
from bokeh.models import ColumnDataSource, Range1d, HoverTool, LabelSet
import yaml
from yaml.loader import SafeLoader

def constraint_view_plot(plot_params, variant_params, user_line_params, transcript_dict, glyph_dict, axes, variant_ls, user_tracks, user_track_glyphs, user_lines, user_line_glyphs, title=''):

project_coords.adjust_coordinates(transcript_dict['exons'], intron_size=plot_params['intron_size'])
plot_height = plot_params['plot_height']
plot = figure(title=title, plot_width=1500, tools='tap,box_zoom,xpan,reset', plot_height=plot_height,min_border=0,#, toolbar_location=None,
x_range=Range1d(0, transcript_dict['exons'][-1]['compact_end']), y_range=Range1d(0,plot_height), background_fill_color='white')

plot.grid.grid_line_color = None
plot.toolbar.active_drag = None
plot.yaxis[0].visible = False

### VARIANTS ###
if variant_ls:
project_coords.map_point(variant_ls, transcript_dict['exons'])
ray_glyph,circle_glyph,allele_counts,allele_frequencies = add_variant_glyph(plot_params, variant_params, plot, variant_ls)
if ray_glyph and circle_glyph:
tooltips_variations = [('Position (compact)', '@x'), ('Position (chr)', '@pos'),
('Allele count', '@allele_counts'), ('Allele number', '@allele_numbers'), ('Allele frequency', '@allele_frequencies'),
('Change', '@ref > @alt'), ('VEP Annotation', '@ann'), ('Severity', '@sev')]
plot.add_tools(HoverTool(tooltips=tooltips_variations, renderers=[circle_glyph,ray_glyph], point_policy='follow_mouse', attachment='below'))
glyph_dict['Variant'].extend([ray_glyph,circle_glyph])

if variant_params['add_variant_axis']:
axes['count'].append(add_variant_axis(plot_params, variant_params, plot, 'Allele count', allele_counts, visible=True))
axes['allele_frequency'].append(add_variant_axis(plot_params, variant_params, plot, 'Allele frequency',allele_frequencies, visible=False))

else: variant_params['add_variant_axis'] = False

tooltips_features = [('Type','@feat_type'), ('Start (compact)', '@adj_start'), ('End (compact)', '@adj_end'),
('Start (chr)', '@true_start'), ('End (chr)', '@true_end'), ('Length', '@true_len')]

### INTRONS ###
introns = project_coords.get_introns_from_exons(transcript_dict['exons'])
introns_glyph = add_intron_glyph(plot_params, plot, introns)
plot.add_tools(HoverTool(tooltips=tooltips_features, renderers=[introns_glyph], point_policy='follow_mouse', attachment='below'))

### EXONS ###
exons_glyph,arrow_glyph = add_exon_glyph(plot_params, plot, transcript_dict['exons'], transcript_dict['direction'])
plot.add_tools(HoverTool(tooltips=tooltips_features, renderers=[exons_glyph], point_policy='follow_mouse', attachment='below'))

glyph_dict['Direction'].append(arrow_glyph)
glyph_dict['exon'].append(exons_glyph)

### UTRs ###
project_coords.map_box(transcript_dict['UTRs'], transcript_dict['exons'])
UTR_glyph = add_UTR_glyph(plot_params, plot, transcript_dict['UTRs'])
plot.add_tools(HoverTool(tooltips=tooltips_features, renderers=[UTR_glyph], point_policy='follow_mouse', attachment='below'))
glyph_dict['UTRs'].append(UTR_glyph)

### USER TRACKS ###
h = plot_params['track_height']

for idx,track_name in enumerate(user_tracks):
project_coords.map_box(user_tracks[track_name], transcript_dict['exons'])

tooltips_tracks = [('Name','@track_names'), ('Start (adjusted)', '@adj_start'), ('End (adjusted)', '@adj_end'),
('Start (true)', '@true_start'), ('End (true)', '@true_end'), ('Length', '@true_len')]

y = ((h*1.5)*(len(user_tracks) - idx - 1)+(h))
track_glyph = add_track_glyph(plot, user_tracks[track_name], h*0.9, y)
plot.add_tools(HoverTool(tooltips=tooltips_tracks, renderers=[track_glyph], point_policy='follow_mouse', attachment='below'))
user_track_glyphs[track_name].append(track_glyph)
cs = ColumnDataSource(dict(x=[0], y=[(h*1.5)*(len(user_tracks) - idx - 1)], text=[list(user_tracks.keys())[idx]]))
label = LabelSet(source=cs, x='x', y='y', text='text',text_font_size='{}px'.format(plot_params['track_height']),
text_align='left')
plot.add_layout(label)
user_track_glyphs[track_name].append(label)

### USER LINES ###
for idx,axis_name in enumerate(user_line_params):
all_xs = []
all_ys = []
for line in user_line_params[axis_name]['lines']:
xs_ls,ys_ls = map_line(user_lines[axis_name][line], transcript_dict['exons'])
all_xs.append(xs_ls)
all_ys.append(ys_ls)
y_max = max([item for sublist in list(np.array(all_ys,dtype=object).flat) for item in sublist])

for idx,line in enumerate(user_line_params[axis_name]['lines']):
line_params=user_line_params[axis_name]['lines'][line]
line_glyph = add_multi_line_glyph(plot_params, plot, all_xs[idx], all_ys[idx], max_y=y_max, y0=plot_params['y0'], fill_area=line_params['fill_area'], line_color=line_params['color'], line_alpha=line_params['alpha'])
line_glyph.level = 'underlay'
user_line_glyphs[axis_name].append(line_glyph)

add_user_axis(plot, plot_params, user_line_params, axis_name, y_max, plot_params['y0'], plot_params['plot_height'], y_min=0, num_ticks=3, axis_position='right', visible=True)

return plot,glyph_dict

def parse_args():
parser = argparse.ArgumentParser(description='constraintview')
parser.add_argument('config_file', help='must be .yaml; sample config file can be found at xyz')
parser.add_argument('transcripts', help='transcripts to plot, can be [... str] or all')
parser.add_argument('-o', '--output', required=False, help='output filepath')
args = parser.parse_args()

### CONFIG ###
config_file = args.config_file
# TODO what to do if config file doesn't have required params?
if config_file[-5:] != '.yaml': raise ValueError('configuration file must be .yaml')
with open(config_file) as f:
params = list(yaml.load_all(f, Loader=SafeLoader))
plot_params = params[0]
variant_params = params[1]
user_track_params = params[2]
user_line_params = params[3]
plot_params['transcript_height'] = 20 + len(user_track_params) * plot_params['track_height'] * 1.5
plot_params['y0'] = plot_params['transcript_height'] + plot_params['exon_height'] / 2 # "0" for line plots

### COLORS ###
with open('named_colors.yaml') as f:
named_colors = list(yaml.load_all(f, Loader=SafeLoader))[0]

### GLYPH COLORS ###
for glyph_type in plot_params['glyph_colors']:
color = plot_params['glyph_colors'][glyph_type]
if color[0] != "#": plot_params['glyph_colors'][glyph_type] = named_colors[color]

### TRACKS ###
for track_name in user_track_params:
track_db = gff_to_db(user_track_params[track_name]['gtf_path'], user_track_params[track_name]['gtf_path'] + '.db')
user_track_params[track_name]['db'] = track_db
with open('palettes.yaml') as f:
palettes = list(yaml.load_all(f, Loader=SafeLoader))[0]
track_colors = [named_colors[c] for c in palettes[plot_params['track_palette']]]
plot_params['track_colors'] = track_colors

### TRANSCRIPTS ###
transcript_IDs = args.transcripts
if transcript_IDs != 'all':
transcript_IDs = transcript_IDs.strip('[').strip(']').split(',')
transcript_IDs = [t.strip('\'') for t in transcript_IDs]

### OUTPUT ###
if args.output: output = args.output + '.html' if args.output[-5:] != '.html' else args.output
else: output = 'plot.html'
return plot_params,variant_params,user_track_params,user_line_params,transcript_IDs,output

def constraint_view():
plot_params, variant_params, user_track_params, user_line_params, transcript_IDs, output = parse_args()
# 'test.db' to make sure .db does not accidentally get overwritten during development
gff_db = gff_to_db(plot_params['gff_path'],plot_params['gff_path']+'test.db')

gene_feature = get_gene_feature(gff_db, plot_params['gene_name'])

if transcript_IDs == 'transcript_names':
transcripts = get_transcript_dict(plot_params, gff_db, gene_feature, 'all')
print(list(transcripts.keys()))
exit()

transcripts = get_transcript_dict(plot_params, gff_db, gene_feature, transcript_IDs)
transcript_IDs = list(transcripts.keys()) #if 'all', transcript_IDs will become list of transcript names; if nonexistent IDs they are removed

variant_ls = get_variants(plot_params['variant_path'], gene_feature.start, gene_feature.end, variant_params['seqid']) if variant_params['plot_variants'] else []
color_variants(plot_params, variant_params, variant_ls)
user_tracks = {track_name: get_track(user_track_params, track_name) for track_name in user_track_params}
for track_name in user_tracks: color_boxes(plot_params, user_track_params, track_name, user_tracks[track_name])
user_lines = {axis_name:{} for axis_name in user_line_params}
for axis_name in user_line_params:
for line_name in user_line_params[axis_name]['lines']:
user_lines[axis_name][line_name] = get_line(user_line_params[axis_name]['lines'][line_name]['filepath'])

plot_ls = []
glyph_dict = dict(exon=[],UTRs=[],Variant=[],Direction=[])
user_track_glyphs = {track_name:[] for track_name in user_track_params}
user_line_glyphs = {line_name:[] for line_name in user_line_params}
axes = dict(count=[],allele_frequency=[])
for line_name in user_line_params:
axes[line_name] = []

for idx,ID in enumerate(transcript_IDs):
title = 'gene={}; transcript={}/{}'.format(plot_params['gene_name'], ID, transcripts[ID]['ID'])
if transcripts[ID]['direction']: title += ' ({})'.format(transcripts[ID]['direction'])
plot,glyph_dict = constraint_view_plot(plot_params, variant_params, user_line_params, transcripts[ID], glyph_dict, axes, variant_ls, user_tracks, user_track_glyphs, user_lines, user_line_glyphs, title=title)
plot_ls.append(plot)

legend = add_legend(user_line_params)
add_exon_zoom(plot_ls,glyph_dict)
checkbox = add_checkbox(plot_ls,axes,glyph_dict,plot_params, variant_params)
user_tracks_checkbox = add_user_tracks_checkbox(plot_ls,axes,user_track_glyphs,glyph_dict['Direction'],plot_params)

user_line_checkboxes=[] #one checkbox per user line, so that they can be lined up with sliders
for axis in user_line_params:
user_line_checkbox = add_user_lines_checkbox(plot_ls, axes[axis], user_line_glyphs[axis], axis)
user_line_checkboxes.append(user_line_checkbox)

if variant_params['plot_variants']:
div_type,radio_group_type,div_scale,radio_group_scale = add_linear_log_scale(axes, glyph_dict)

sliders = []
for axis_name in user_line_params:
if user_line_params[axis_name]['smoothing_slider']:
fill_area_ls = [user_line_params[axis_name]['lines'][line]['fill_area'] for line in user_line_params[axis_name]['lines']]*len(plot_ls)
sliders.append(add_smoothing_slider(user_line_glyphs[axis_name], fill_area_ls, title='{} smoothing'.format(axis_name)))
else:
sliders.append(None)

if variant_params['plot_variants'] and variant_params['add_variant_axis']:
grid1 = [[checkbox, user_tracks_checkbox],[div_type, div_scale],[radio_group_type, radio_group_scale]]
else: grid1 = [[checkbox, user_tracks_checkbox]]

lines = list(zip(user_line_checkboxes,sliders))
for tup in lines: grid1.append(tup)
grid1.append([legend])
grid = gridplot(grid1, toolbar_location=None)

output_file(output)
save(column([grid]+plot_ls))

#t0 = time.time()
constraint_view()
#t1 = time.time()
#print(t1-t0)
Loading