Skip to content

Commit

Permalink
fixed location issues
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-fuchs committed May 15, 2024
1 parent 36fa028 commit 5fbaaa2
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 59 deletions.
2 changes: 1 addition & 1 deletion example_data/example_mave_data/input/MaveExpRBD.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
accession,hgvs_nt,hgvs_splice,hgvs_pro,score,average,library,AA
vi-s-accession,hgvs_nt,hgvs_splice,hgvs_pro,score,average,library,AA
urn:mavedb:00000044-b-2#4199,NA,NA,p.Ala105*,-4.58,-4.57,1,A435
urn:mavedb:00000044-b-2#4200,NA,NA,p.Ala105*,-4.55,-4.57,2,A435
urn:mavedb:00000044-b-2#4187,NA,NA,p.Ala105Arg,-2.86,-2.82,1,A435R
Expand Down
84 changes: 40 additions & 44 deletions virheat/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ def main(sysargs=sys.argv[1:]):
vcf_files = sorted(vcf_files, key=lambda x: data_prep.get_digit_and_alpha(os.path.basename(x)))

# extract vcf info
n_scores = 0
if args.scores:
reference_name, frequency_lists, unique_mutations, file_names = data_prep.extract_vcf_data(vcf_files, threshold=args.threshold, scores=1)
n_scores = len(args.scores)
else:
reference_name, frequency_lists, unique_mutations, file_names = data_prep.extract_vcf_data(vcf_files, threshold=args.threshold)
if args.zoom:
Expand All @@ -156,7 +158,7 @@ def main(sysargs=sys.argv[1:]):
if args.delete_n is not None:
frequency_array = data_prep.delete_n_mutations(frequency_array, unique_mutations, args.delete_n)

# annotate low coverage if per base coveage from qualimap was provided
# annotate low coverage if per base coverage from qualimap was provided
data_prep.annotate_non_covered_regions(args.input[0], args.min_cov, frequency_array, file_names, unique_mutations)

# define relative locations of all items in the plot
Expand All @@ -168,7 +170,7 @@ def main(sysargs=sys.argv[1:]):
else:
genome_y_location = math.log2(n_samples)

# gff3 data extraction
# gff3 data extraction and define y coordinates
if args.gff3_path is not None and args.genome_length is not None:
sys.exit("\033[31m\033[1mERROR:\033[0m Do not provide the -g and -l argument simultaneously!")
elif args.gff3_path is not None:
Expand All @@ -178,29 +180,24 @@ def main(sysargs=sys.argv[1:]):
print("\033[31m\033[1mWARNING:\033[0m gff3 reference does not match the vcf reference!")
genome_end = data_prep.get_genome_end(gff3_info)
genes_with_mutations, n_tracks = data_prep.create_track_dict(unique_mutations, gff3_info, args.gff3_annotations)
# define space for the genome vis tracks
if args.scores and n_samples >= 4:
min_y_location = genome_y_location + genome_y_location/2 * (n_tracks+1) + 1.7*len(args.scores)
else:
min_y_location = genome_y_location + genome_y_location/2 * (n_tracks+1)
elif args.genome_length is not None:
genome_end = args.genome_length
if args.scores:
if n_samples < 4:
min_y_location = genome_y_location + 2 + len(args.scores) * 0.3
else:
min_y_location = genome_y_location + math.log2(n_samples) + 1.7 * len(args.scores)
else:
min_y_location = genome_y_location
n_tracks = 0
else:
sys.exit("\033[31m\033[1mERROR:\033[0m Provide either a gff3 file (-g) or the length (-l) of the genome which you used for mapping")

# define min y coordinate
if n_tracks != 0 or n_scores != 0:
min_y_location = genome_y_location + genome_y_location / 2 * (n_tracks + n_scores + 1)
else:
min_y_location = genome_y_location

# define size of the plot
y_size = n_mutations*0.4
if args.scores:
x_size = y_size * (n_samples + min_y_location + len(args.scores)) / n_mutations
else:
x_size = y_size*(n_samples+min_y_location)/n_mutations
x_size = y_size*(n_samples + min_y_location)/n_mutations
x_size = x_size-x_size*0.15 # compensate of heatmap annotation

# ini the fig
Expand All @@ -217,45 +214,46 @@ def main(sysargs=sys.argv[1:]):
else:
start, stop = 0, genome_end

# plot all elements
# plot all common elements
cmap = cm.gist_heat_r
cmap.set_bad('silver', 1.)
cmap_cells = cm.ScalarMappable(norm=colors.Normalize(0, 1), cmap=cmap)
plotting.create_heatmap(ax, frequency_array, cmap_cells)
mutation_set = plotting.create_genome_vis(ax, genome_y_location, n_mutations, unique_mutations, start, stop)
plotting.create_axis(ax, n_mutations, min_y_location, n_samples, file_names, start, stop, genome_y_location,
unique_mutations, reference_name, n_scores)
plotting.create_mutation_legend(mutation_set, min_y_location, n_samples, n_scores)
plotting.create_colorbar(args.threshold, cmap_cells, min_y_location, n_samples, ax, n_scores)
# plot scores as track below the genome track
if args.scores:
scores_files = args.scores
n_scoresets = len(scores_files)
score_count = 0
plotting.create_axis(ax, n_mutations, min_y_location, n_samples, file_names, start, stop, genome_y_location, unique_mutations, reference_name, n_scoresets)
plotting.create_mutation_legend(mutation_set, min_y_location, n_samples, n_scoresets)
score_count = 1
for score_params in args.scores:
score_count += 1
scores_file, pos_col, score_col, score_name = score_params
unique_scores = data_prep.extract_scores(unique_mutations, scores_file, pos_col, score_col)
score_set = plotting.create_scores_vis(ax, genome_y_location, n_mutations, unique_scores, start, stop, score_name=score_name, score_count=score_count)
cmap_scores = plt.cm.get_cmap('coolwarm') # blue to red colormap
if n_scoresets<3:
plotting.create_scores_cbar(cmap_scores, min_y_location, n_scoresets, score_set, score_name, score_count, ax)
plotting.create_scores_vis(ax, genome_y_location, n_mutations, n_tracks, unique_scores, start, stop, score_name=score_name, score_count=score_count)



#cmap_scores = plt.cm.get_cmap('coolwarm') # blue to red colormap
#if n_scoresets < 3:
# plotting.create_scores_cbar(cmap_scores, min_y_location, n_scoresets, score_set, score_name, score_count, ax)

# plotting colorbars for scorsets if more than 3 scoresets - move colorbars under
if n_scoresets > 2:
ax2 = fig.add_axes(ax.get_position(), frameon=False)
ax2.set_position([ax.get_position().x0, ax.get_position().y0 - 1.1 * ax.get_position().height*0.5,
ax.get_position().width, 0.4])
ax2.set_xticks([])
ax2.set_yticks([])
score_count = 0
for score_params in args.scores:
score_count += 1
scores_file, pos_col, score_col, score_name = score_params
unique_scores = data_prep.extract_scores(unique_mutations, scores_file, pos_col, score_col)
score_set = plotting.create_scores_vis(ax, genome_y_location, n_mutations, unique_scores, start, stop, score_name=score_name, score_count=score_count, no_plot=1)
cmap_scores = plt.cm.get_cmap('coolwarm')
plotting.create_scores_cbar(cmap_scores, min_y_location, n_scoresets, score_set, score_name, score_count, ax2)
else:
plotting.create_axis(ax, n_mutations, min_y_location, n_samples, file_names, start, stop, genome_y_location, unique_mutations, reference_name)
plotting.create_mutation_legend(mutation_set, min_y_location, n_samples)
# if n_scoresets > 2:
# ax2 = fig.add_axes(ax.get_position(), frameon=False)
# ax2.set_position([ax.get_position().x0, ax.get_position().y0 - 1.1 * ax.get_position().height*0.5,
# ax.get_position().width, 0.4])
# ax2.set_xticks([])
# ax2.set_yticks([])
# score_count = 0
# for score_params in args.scores:
# score_count += 1
# scores_file, pos_col, score_col, score_name = score_params
# unique_scores = data_prep.extract_scores(unique_mutations, scores_file, pos_col, score_col)
# score_set = plotting.create_scores_vis(ax, genome_y_location, n_mutations, unique_scores, start, stop, score_name=score_name, score_count=score_count, no_plot=1)
# cmap_scores = plt.cm.get_cmap('coolwarm')
# #plotting.create_scores_cbar(cmap_scores, min_y_location, n_scoresets, score_set, score_name, score_count, ax2)

if args.gff3_path is not None:
if genes_with_mutations:
Expand All @@ -269,8 +267,6 @@ def main(sysargs=sys.argv[1:]):
else:
plotting.create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, start, stop, min_y_location, genome_y_location, colors_genes)

plotting.create_colorbar(args.threshold, cmap_cells, min_y_location, n_samples, ax)

# create output folder
if not os.path.exists(args.input[1]):
os.makedirs(args.input[1])
Expand Down
29 changes: 15 additions & 14 deletions virheat/scripts/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,16 @@ def create_genome_vis(ax, genome_y_location, n_mutations, unique_mutations, star
return mutation_set


def create_scores_vis(ax, genome_y_location, n_mutations, unique_mutations, start, stop, score_name=None, score_count=None, no_plot=0):
def create_scores_vis(ax, genome_y_location, n_mutations, n_tracks, unique_mutations, start, stop, score_name=None, score_count=None):
"""
create the scores rectangles, mappings to the reference
"""
y_min = -genome_y_location
y_max = -genome_y_location+genome_y_location/2
score_set = []

y_min = -genome_y_location - (n_tracks+1)*(genome_y_location/2) - score_count*(genome_y_location/2)
y_max = y_min + genome_y_location / 2

if score_name:
score_set = []
for mutation in unique_mutations:
mutation_attributes = mutation.split("_")
score_set.append(float(mutation_attributes[5]))
Expand All @@ -88,11 +90,11 @@ def create_scores_vis(ax, genome_y_location, n_mutations, unique_mutations, star
else:
print("\033[31m\033[1mERROR:\033[0m Seems like there are no scores in the score set '{}' corresponding to the plotted mutation positions.".format(score_name))
cmap = plt.cm.get_cmap('coolwarm') # blue to red colormap
ax.text(-0.02 * n_mutations, y_min - (score_count + 1.5) * (genome_y_location / 2), score_name, ha='right', va='center')
ax.text(-0.02 * n_mutations, y_min+genome_y_location/4, score_name, ha='right', va='center')

# create a rectangle for the scores
rect = patches.FancyBboxPatch(
(0, y_min - (score_count + 2) * (genome_y_location / 2)), n_mutations*1.5, y_max - y_min,
(0, y_min), n_mutations, y_max - y_min,
boxstyle="round,pad=-0.0040,rounding_size=0.03",
ec="lightgray", fc=(0, 0, 0, 0.05)
)
Expand All @@ -106,12 +108,11 @@ def create_scores_vis(ax, genome_y_location, n_mutations, unique_mutations, star
mutation_x_location = n_mutations/length*(int(mutation_attributes[0])-start)
x_start += 1
# create lines for score_set
if score_name and no_plot !=1:
if score_set:
score_value = float(mutation_attributes[5])
if score_value in score_set:
color = cmap(norm(score_value)) # map score to colormap
plt.vlines(x=mutation_x_location, ymin=y_min - (score_count + 2) * (genome_y_location / 2), ymax=y_max - (score_count + 2) * (genome_y_location / 2), color=color, linestyle='-')
if score_name and score_set:
score_value = float(mutation_attributes[5])
if score_value in score_set:
color = cmap(norm(score_value)) # map score to colormap
plt.vlines(x=mutation_x_location, ymin=y_min, ymax=y_max, color=color, linestyle='-')

return score_set

Expand All @@ -135,7 +136,7 @@ def create_scores_cbar(cmap, min_y_location, n_scoresets, score_set, score_name,
cbar.set_label('\n'.join(textwrap.wrap(score_name, 20)), size='x-small')


def create_colorbar(threshold, cmap, min_y_location, n_samples, ax):
def create_colorbar(threshold, cmap, min_y_location, n_samples, ax, n_scores=0):
"""
creates a custom colorbar and annotates the threshold
"""
Expand All @@ -159,7 +160,7 @@ def create_colorbar(threshold, cmap, min_y_location, n_samples, ax):
labels.remove(rounded_threshold)
ticks.append(threshold)
labels.append(f"threshold\n={threshold}")
cbar = plt.colorbar(cmap, label="variant frequency", pad=0, shrink=n_samples/(min_y_location+n_samples), anchor=(0.1,1), aspect=15, ax=ax)
cbar = plt.colorbar(cmap, label="variant frequency", pad=0, shrink=n_samples/(min_y_location+n_samples+n_scores), anchor=(0.1,1), aspect=15, ax=ax)
cbar.set_ticks(ticks)
cbar.set_ticklabels(labels)

Expand Down

0 comments on commit 5fbaaa2

Please sign in to comment.