Skip to content

Commit

Permalink
Merge pull request #2 from oxfordmmm/add-unit-tests-pt-2
Browse files Browse the repository at this point in the history
Add unit tests pt 2
  • Loading branch information
JeremyWesthead authored Aug 16, 2024
2 parents 525e10d + 3d063b2 commit a85f885
Show file tree
Hide file tree
Showing 14 changed files with 2,561 additions and 130 deletions.
9 changes: 7 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "grumpy"
version = "0.1.10"
version = "0.1.11"
edition = "2021"
description = "Genetic analysis in Rust."
license-file = "LICENSE"
Expand All @@ -14,11 +14,16 @@ ordered-float = "4.2.1"
string_cache = "0.8.7"
vcf = "0.6.1"
pyo3 = { version = "0.22.1", features = ["extension-module"] }
pretty_assertions = "1.4.0"

[profile.dev]
opt-level = 3
debug = 1

[lib]
name = "grumpy"
crate-type = ["cdylib", "rlib"]
crate-type = ["cdylib", "rlib"]

[lints.rust]
# Used for ignoring boilerplate parts of py03 bindings + main function
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] }
1 change: 1 addition & 0 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ pub struct Evidence {
pub vcf_idx: Option<i64>,
}

#[cfg(not(tarpaulin_include))]
#[pymethods]
impl Evidence {
#[getter]
Expand Down
128 changes: 89 additions & 39 deletions src/difference.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Module for handling differences between genomes and genes
use std::collections::{HashMap, HashSet};

use pyo3::prelude::*;

use ordered_float::{Float, OrderedFloat};
Expand Down Expand Up @@ -162,33 +164,64 @@ impl GenomeDifference {
let mut garc = "".to_string();
let mut indel_bases = None;
let mut indel_length = 0;
let mut gene_name = None;
let mut gene_position = None;
let mut codon_idx = None;

if alt.alt_type == AltType::REF {
// Skip ref calls
continue;
}

// Only annotate with gene information if there is a single gene at this position
if alt_pos.genes.len() == 1 {
gene_name = Some(alt_pos.genes[0].clone());
let gene = alt_genome.get_gene(alt_pos.genes[0].clone());
let (_gene_position, _codon_idx) =
gene.genome_idx_map.get(&alt_pos.genome_idx).unwrap();
codon_idx = *_codon_idx;
let mut gene_names = Vec::new();
let mut gene_positions = Vec::new();
let mut codon_idxs = Vec::new();
let mut genes_to_check = HashSet::new();
let mut gene_to_genome = HashMap::new();
if alt.alt_type == AltType::DEL {
// This is a deletion, so check for all genes within each base of this deletion
for (i, _) in alt.base.chars().enumerate() {
for g in alt_genome.genome_positions[idx + i].genes.clone() {
let gene_def = alt_genome.gene_name_to_def.get(&g).unwrap();

if !genes_to_check.contains(&g) {
genes_to_check.insert(g.clone());
// Get the genome index of the first base of the deletion
gene_to_genome.insert(g.clone(), (idx + i + 1) as i64);
}

if gene_def.reverse_complement {
// If this gene is reverse complemented, we want the last base rather than first
gene_to_genome.insert(g, (idx + i + 1) as i64);
}
}
}
}
for gene in alt_pos.genes.iter() {
genes_to_check.insert(gene.clone());
gene_to_genome.insert(gene.clone(), ref_pos.genome_idx);
}
let mut genes = genes_to_check.iter().cloned().collect::<Vec<String>>();
genes.sort();
for gene in genes.iter() {
let gene_name = Some(gene.clone());
let g = alt_genome.get_gene(gene.clone());
let mut __gene_position = None;
let (_gene_position, _codon_idx) = g
.genome_idx_map
.get(gene_to_genome.get(gene).unwrap())
.unwrap();
codon_idxs.push(*_codon_idx);
if alt.alt_type == AltType::SNP
|| alt.alt_type == AltType::HET
|| alt.alt_type == AltType::NULL
|| _gene_position < &0
{
// Use gene position for these as it should cover cases of being the codon index
gene_position = Some(*_gene_position);
__gene_position = Some(*_gene_position);
} else {
// Use nucleotide number for indels as they shouldn't be construed as codon indices
gene_position = GenomeDifference::get_nucleotide_number(&gene, alt);
__gene_position = GenomeDifference::get_nucleotide_number(&g, alt);
}
gene_names.push(gene_name.clone());
gene_positions.push(__gene_position);
}

if alt.alt_type == AltType::SNP
Expand Down Expand Up @@ -222,22 +255,35 @@ impl GenomeDifference {
+ &trim_float_string(format!("{:.3}", alt.evidence.frs.unwrap()));
}
}
let variant = Variant {
variant: garc,
nucleotide_index: ref_pos.genome_idx,
evidence: alt.evidence.vcf_row.clone(),
vcf_idx: alt.evidence.vcf_idx,
indel_length,
indel_nucleotides: indel_bases,
gene_position,
codon_idx,
gene_name,
};

if alt.evidence.is_minor {
minor_variants.push(variant);
} else {
variants.push(variant);
if gene_names.is_empty() {
gene_names.push(None);
gene_positions.push(None);
codon_idxs.push(None);
}

for ((gene_name, gene_position), codon_idx) in gene_names
.iter()
.zip(gene_positions.iter())
.zip(codon_idxs.iter())
{
let variant = Variant {
variant: garc.clone(),
nucleotide_index: ref_pos.genome_idx,
evidence: alt.evidence.vcf_row.clone(),
vcf_idx: alt.evidence.vcf_idx,
indel_length,
indel_nucleotides: indel_bases.clone(),
gene_position: *gene_position,
codon_idx: *codon_idx,
gene_name: gene_name.clone(),
};

if alt.evidence.is_minor {
minor_variants.push(variant);
} else {
variants.push(variant);
}
}
}
}
Expand Down Expand Up @@ -265,10 +311,12 @@ impl GenomeDifference {
GenePos::Codon(c) => {
for codon in c.codon.iter() {
for alt in codon.alts.iter() {
// Match on evidence and base length
// Match on evidence's VCF row and index
// This is to catch cases where a deletion starts outside of this gene
if genome_alt.base.len() == alt.base.len()
&& genome_alt.evidence == alt.evidence
if genome_alt.evidence.vcf_row == alt.evidence.vcf_row
&& genome_alt.evidence.vcf_idx == alt.evidence.vcf_idx
&& genome_alt.alt_type == alt.alt_type
&& genome_alt.evidence.is_minor == alt.evidence.is_minor
{
return Some(codon.nucleotide_number);
}
Expand All @@ -277,10 +325,12 @@ impl GenomeDifference {
}
GenePos::Nucleotide(n) => {
for alt in n.alts.iter() {
// Match on evidence and base length
// Match on evidence's VCF row and index
// This is to catch cases where a deletion starts outside of this gene
if genome_alt.base.len() == alt.base.len()
&& genome_alt.evidence == alt.evidence
if genome_alt.evidence.vcf_row == alt.evidence.vcf_row
&& genome_alt.evidence.vcf_idx == alt.evidence.vcf_idx
&& genome_alt.alt_type == alt.alt_type
&& genome_alt.evidence.is_minor == alt.evidence.is_minor
{
return Some(n.nucleotide_number);
}
Expand Down Expand Up @@ -432,11 +482,7 @@ impl GeneDifference {
for ev in alt_codon.codon.clone() {
if !ev.alts.is_empty() {
for e in ev.alts.iter() {
if !e.evidence.is_minor
&& (e.alt_type == AltType::SNP
|| e.alt_type == AltType::HET
|| e.alt_type == AltType::NULL)
{
if !e.evidence.is_minor && e.alt_type == AltType::SNP {
evidence.push(e.evidence.clone());
}
}
Expand Down Expand Up @@ -772,7 +818,7 @@ impl GeneDifference {
+ &ref_nc.nucleotide_number.to_string()
+ &alt.base;
ref_nucleotides = Some(ref_nc.reference.to_string());
alt_nucleotides = Some(alt_nc.reference.to_string());
alt_nucleotides = Some(alt.base.to_string());
}
if alt.alt_type == AltType::INS {
mutation =
Expand Down Expand Up @@ -1026,6 +1072,10 @@ impl GeneDifference {
fn trim_float_string(mut float_string: String) -> String {
while float_string.ends_with('0') {
float_string.pop();
if float_string.ends_with("1.0") {
// Keep trailing 0 for 1.0
return float_string;
}
}
// I hate implicit returns, but appease clippy
float_string
Expand Down
Loading

0 comments on commit a85f885

Please sign in to comment.