Skip to content

Commit

Permalink
smaller changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-fuchs committed Aug 6, 2024
1 parent a81daa6 commit d780d2b
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 13 deletions.
2 changes: 1 addition & 1 deletion example_mave_data/MaveBindRBD.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
accession,hgvs_nt,hgvs_splice,hgvs_pro,score,average,library,AA
exaccession,hgvs_nt,hgvs_splice,hgvs_pro,score,average,library,AA
urn:mavedb:00000044-a-1#7954,NA,NA,p.Lys199Val,0.09,0.07,2,K529V
urn:mavedb:00000044-a-1#7955,NA,NA,p.Lys199Trp,-0.02,0,1,K529W
urn:mavedb:00000044-a-1#7956,NA,NA,p.Lys199Trp,0.01,0,2,K529W
Expand Down
2 changes: 1 addition & 1 deletion virheat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""plot vcf data as a heatmap mapped to a virus genome"""
_program = "virheat"
__version__ = "0.7"
__version__ = "0.7.1"
7 changes: 4 additions & 3 deletions virheat/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def get_args(sysargs):
"-r",
"--reference",
type=str,
metavar="None",
metavar="ref_id",
required=True,
default=None,
help="reference identifier"
)
Expand Down Expand Up @@ -155,7 +156,7 @@ def main(sysargs=sys.argv[1:]):
sys.exit("\033[31m\033[1mERROR:\033[0m No VCF files provided")
else:
if args.scores:
frequency_lists, unique_mutations, file_names = data_prep.extract_vcf_data(vcf_files, threshold=args.threshold, scores=True)
frequency_lists, unique_mutations, file_names = data_prep.extract_vcf_data(vcf_files, args.reference, threshold=args.threshold, scores=True)
n_scores = len(args.scores)
else:
frequency_lists, unique_mutations, file_names = data_prep.extract_vcf_data(vcf_files, args.reference, threshold=args.threshold)
Expand Down Expand Up @@ -186,7 +187,7 @@ def main(sysargs=sys.argv[1:]):
if args.gff3_path is not None and args.genome_length is not None:
sys.exit("\033[31m\033[1mERROR:\033[0m Do not provide the -g and -l argument simultaneously!")
elif args.gff3_path is not None:
gff3_info, gff3_ref_name = data_prep.parse_gff3(args.gff3_path, args.reference)
gff3_info = data_prep.parse_gff3(args.gff3_path, args.reference)
genome_end = data_prep.get_genome_end(gff3_info)
genes_with_mutations, n_tracks = data_prep.create_track_dict(unique_mutations, gff3_info, args.gff3_annotations)
elif args.genome_length is not None:
Expand Down
13 changes: 5 additions & 8 deletions virheat/scripts/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def read_vcf(vcf_file, reference):
header = header_lines[0]
# get each line as frequency_lists
with open(vcf_file, "r") as f:
lines = [l.split("\t") for l in f if not l.startswith('#') and l.startswith(reference)]
lines = [l.split("\t") for l in f if l.startswith(reference)]
# check if vcf is empty
if not lines:
print(f"\033[31m\033[1mWARNING:\033[0m {vcf_file} has no variants with {reference} as #CHROM!")
print(f"\033[31m\033[1mWARNING:\033[0m {vcf_file} has no variants to {reference}!")
# get standard headers as keys
for key in header[0:6]:
vcf_dict[key] = []
Expand Down Expand Up @@ -139,7 +139,7 @@ def extract_vcf_data(vcf_files, reference, threshold=0, scores=False):
{x[0] for li in frequency_lists for x in li}, key=lambda x: int(x.split("_")[0])
)
if not unique_mutations:
sys.exit(f"\033[31m\033[1mERROR:\033[0m No unique mutations to {reference} found in all vcf files!")
sys.exit(f"\033[31m\033[1mERROR:\033[0m No variants to {reference} in all vcf files!")

return frequency_lists, unique_mutations, file_names

Expand Down Expand Up @@ -279,12 +279,9 @@ def parse_gff3(file, reference):
with open(file, "r") as gff3_file:
for line in gff3_file:
# ignore comments and last line
if line.startswith("#") or line == "\n":
if not line.startswith(reference):
continue
gff_values = line.split("\t")
if gff_values[0] != reference:
continue
gff3_ref_name = gff_values[0]
# create keys
if gff_values[2] not in gff3_dict:
gff3_dict[gff_values[2]] = {}
Expand All @@ -307,7 +304,7 @@ def parse_gff3(file, reference):
if not gff3_dict:
sys.exit(f"\033[31m\033[1mERROR:\033[0m {reference} not found in gff3 file.")

return gff3_dict, gff3_ref_name
return gff3_dict


def get_genome_end(gff3_dict):
Expand Down

0 comments on commit d780d2b

Please sign in to comment.