Skip to content

Commit

Permalink
subset tsv df
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-fuchs committed Aug 6, 2024
1 parent 4b1c49b commit 872caf5
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
2 changes: 1 addition & 1 deletion virheat/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def main(sysargs=sys.argv[1:]):
frequency_array = data_prep.delete_n_mutations(frequency_array, unique_mutations, args.delete_n)

# annotate low coverage if per base coverage from qualimap was provided
data_prep.annotate_non_covered_regions(args.input[0], args.min_cov, frequency_array, file_names, unique_mutations)
data_prep.annotate_non_covered_regions(args.input[0], args.min_cov, frequency_array, file_names, unique_mutations, args.reference)

# define relative locations of all items in the plot
n_samples, n_mutations = len(frequency_array), len(frequency_array[0])
Expand Down
3 changes: 2 additions & 1 deletion virheat/scripts/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def create_freq_array(unique_mutations, frequency_lists):
return np.array(frequency_array)


def annotate_non_covered_regions(coverage_dir, min_coverage, frequency_array, file_names, unique_mutations):
def annotate_non_covered_regions(coverage_dir, min_coverage, frequency_array, file_names, unique_mutations, reference):
"""
Insert nan values into np array if position is not covered. Needs
per base coverage tsv files created by bamqc
Expand All @@ -202,6 +202,7 @@ def annotate_non_covered_regions(coverage_dir, min_coverage, frequency_array, fi
continue
tsv_file = [file for file in per_base_coverage_files if os.path.splitext(os.path.basename(file))[0] == file_name][0]
coverage = pd.read_csv(tsv_file, sep="\t")
coverage = coverage[coverage["#chr"] == reference]
for j, (mutation, frequency) in enumerate(zip(unique_mutations, array)):
mut_pos = int(mutation.split("_")[0])
if coverage[coverage["pos"] == mut_pos].empty or all([frequency == 0, coverage[coverage["pos"] == mut_pos]["coverage"].iloc[0] <= min_coverage]):
Expand Down

0 comments on commit 872caf5

Please sign in to comment.