From 7a91ebcbc191cafbd938cb449a6b5d4d104451a2 Mon Sep 17 00:00:00 2001 From: jonas-fuchs Date: Thu, 14 Nov 2024 17:40:34 +0100 Subject: [PATCH] enabled arrow format for genes --- README.md | 2 ++ virheat/__init__.py | 2 +- virheat/command.py | 8 ++++++- virheat/scripts/data_prep.py | 5 +++-- virheat/scripts/plotting.py | 41 +++++++++++++++++++++++++++++------- 5 files changed, 46 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e7379dd..1ce66fb 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,8 @@ options: path to gff3 (needed if length is not provided) -a [gene ...], --gff3-annotations [gene ...] annotations to display from gff3 file (standard: gene). Multiple possible. + --gene-arrows, --no-gene-arrows + show genes in arrow format (only if the -g argument is provided) (default: False) -t 0, --threshold 0 display frequencies above this threshold (0-1) --delete, --no-delete delete mutations that are present in all samples and their maximum frequency divergence is smaller than 0.5 (default: True) diff --git a/virheat/__init__.py b/virheat/__init__.py index 249c695..3f3f99c 100644 --- a/virheat/__init__.py +++ b/virheat/__init__.py @@ -1,3 +1,3 @@ """plot vcf data as a heatmap mapped to a virus genome""" _program = "virheat" -__version__ = "0.7.2" +__version__ = "0.7.3" diff --git a/virheat/command.py b/virheat/command.py index dcb484a..39bd48f 100644 --- a/virheat/command.py +++ b/virheat/command.py @@ -74,6 +74,12 @@ def get_args(sysargs): default=["gene"], help="annotations to display from gff3 file (standard: gene). Multiple possible." ) + parser.add_argument( + "--gene-arrows", + action=argparse.BooleanOptionalAction, + default=False, + help="show genes as arrows" + ) parser.add_argument( "-t", "--threshold", @@ -243,7 +249,7 @@ def main(sysargs=sys.argv[1:]): cmap_genes = plt.get_cmap('tab20', len(genes_with_mutations)) colors_genes = [cmap_genes(i) for i in range(len(genes_with_mutations))] # plot gene track - plotting.create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, start, stop, min_y_location, genome_y_location, colors_genes, n_scores) + plotting.create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, start, stop, min_y_location, genome_y_location, colors_genes, n_scores, show_arrows=args.gene_arrows) # plot scores as track below the gene/genome track if args.scores: score_count = 1 diff --git a/virheat/scripts/data_prep.py b/virheat/scripts/data_prep.py index 65cfab8..b1d1650 100644 --- a/virheat/scripts/data_prep.py +++ b/virheat/scripts/data_prep.py @@ -316,6 +316,7 @@ def parse_gff3(file, reference): # add start, stop and strand gff3_dict[gff_values[2]][attribute_id]["start"] = int(gff_values[3]) gff3_dict[gff_values[2]][attribute_id]["stop"] = int(gff_values[4]) + gff3_dict[gff_values[2]][attribute_id]["strand"] = gff_values[6] gff3_file.close() @@ -350,7 +351,6 @@ def create_track_dict(unique_mutations, gff3_info, annotation_type): # find genes that have a mutation genes_with_mutations = set() - for mutation in unique_mutations: # get the mutation from string mutation = int(mutation.split("_")[0]) @@ -366,7 +366,8 @@ def create_track_dict(unique_mutations, gff3_info, annotation_type): genes_with_mutations.add( (attribute_name, gff3_info[type][annotation]["start"], - gff3_info[type][annotation]["stop"]) + gff3_info[type][annotation]["stop"], + gff3_info[type][annotation]["strand"]) ) if not genes_with_mutations: print("\033[31m\033[1mWARNING:\033[0m either the annotation types were not found in gff3 or the mutations are not within genes.") diff --git a/virheat/scripts/plotting.py b/virheat/scripts/plotting.py index 34b1fa9..c5605f8 100644 --- a/virheat/scripts/plotting.py +++ b/virheat/scripts/plotting.py @@ -210,13 +210,26 @@ def create_axis(ax, n_mutations, min_y_location, n_samples, file_names, start, s ax.spines["left"].set_visible(False) -def create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, start, stop, min_y_location, genome_y_location, colors_genes, n_scores): +def create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, start, stop, min_y_location, genome_y_location, colors_genes, n_scores, show_arrows = True): """ create the vis for the gene """ gene_annotations = [] mult_factor = n_mutations/(stop-start) + # define arrow head length for all arrows + # dependend on how long the relative genes are to each other. If they are similar in length, it is roughly one 8th + # of the shortest gene to display. + if show_arrows: + all_gene_lengths = [genes_with_mutations[x][0][1]-genes_with_mutations[x][0][0] for x in genes_with_mutations.keys()] + if min(all_gene_lengths)*20 < max(all_gene_lengths): + head_length = mult_factor*min(all_gene_lengths) + elif min(all_gene_lengths)*10 < max(all_gene_lengths): + head_length = mult_factor * min(all_gene_lengths) * 0.6 + elif min(all_gene_lengths)*5 < max(all_gene_lengths): + head_length = mult_factor * min(all_gene_lengths) * 0.3 + else: + head_length = mult_factor * min(all_gene_lengths) * 0.15 for idx, gene in enumerate(genes_with_mutations): # define the plotting values for the patch @@ -231,13 +244,25 @@ def create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, sta width = mult_factor*(genes_with_mutations[gene][0][1]-start) - x_value height = genome_y_location/2 # plot the patch - ax.add_patch( - patches.FancyBboxPatch( - (x_value, y_value), width, height, - boxstyle="round,pad=-0.0040,rounding_size=0.03", - ec="black", fc=colors_genes[idx] - ) - ) + if not show_arrows: + ax.add_patch( + patches.FancyBboxPatch( + (x_value, y_value), width, height, + boxstyle="round,pad=-0.0040,rounding_size=0.03", + ec="black", fc=colors_genes[idx] + ) + ) + else: + if genes_with_mutations[gene][0][2] == '-': + x_value_arrow, arrow_width = x_value + width, -width + else: + x_value_arrow, arrow_width = x_value, width + ax.add_patch( + patches.FancyArrow( + x_value_arrow, y_value + height/2, dx=arrow_width,dy=0, width=height*0.7, head_width=height, head_length=head_length, length_includes_head=True, + ec="black", fc=colors_genes[idx] + ) + ) # define text pos for gene description inside or below the gene box, depending if it fits within if width > n_mutations/(y_size*8)*len(gene): gene_annotations.append(ax.text(x_value+width/2, y_value+height/2, gene, ha="center", va="center"))