Skip to content

Commit

Permalink
test: added genome/gene tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JeremyWesthead committed Aug 1, 2024
1 parent 231d774 commit 145f4a5
Show file tree
Hide file tree
Showing 5 changed files with 291 additions and 11 deletions.
75 changes: 75 additions & 0 deletions reference/TEST-DNA.gbk
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
LOCUS TEST_DNA 99 bp DNA linear VRL 21-JUL-2021
DEFINITION TEST_DNA, complete genome.
ACCESSION TEST_DNA
VERSION TEST_DNA.1
DBLINK BioProject: DNAID
KEYWORDS DNA.
SOURCE TEST_DNA_SOURCE
ORGANISM TEST_DNA_ORGANISM
REFERENCE 1 (bases 1 to 99)
AUTHORS Test,1., Test,2., Test,3.
TITLE Test title for a reference for an DNA strand
JOURNAL Test journal for an DNA strand
PUBMED 1
COMMENT This is some comment about TEST_DNA genome.
FEATURES Location/Qualifiers
source 1..99
/organism="TEST_DNA"
/mol_type="genomic DNA"
/isolate="DNA_ISO"
/db_xref="taxon:1"
gene 4..30
/gene="A"
/locus_tag="TEST_DNA_A"
/db_xref="GeneID:1"
mDNA 4..30
/gene="A"
/locus_tag="TEST_DNA_A"
/product="nucleoprotein"
/db_xref="GeneID:1"
CDS 4..30
/gene="A"
/locus_tag="TEST_DNA_A"
/codon_start=1
/product="nucleoprotein"
/protein_id="PROTEIN_A"
/db_xref="GeneID:1"
/translation="KKTPPPGGG"
gene 28..60
/gene="B"
/locus_tag="TEST_DNA_B"
/db_xref="GeneID:2"
mDNA 28..60
/gene="B"
/locus_tag="TEST_DNA_B"
/product="nucleoprotein"
/db_xref="GeneID:2"
CDS 28..60
/gene="B"
/locus_tag="TEST_DNA_B"
/codon_start=1
/product="nucleoprotein"
/protein_id="PROTEIN_B"
/db_xref="GeneID:2"
/translation="GGFFF!KKNPPP"
gene complement(91..96)
/gene="C"
/locus_tag="TEST_DNA_C"
/db_xref="GeneID:3"
mDNA complement(91..96)
/gene="C"
/locus_tag="TEST_DNA_C"
/product="nucleoprotein"
/db_xref="GeneID:3"
CDS complement(91..96)
/gene="C"
/locus_tag="TEST_DNA_C"
/codon_start=1
/product="nucleoprotein"
/protein_id="PROTEIN_C"
/db_xref="GeneID:3"
/translation="PP"
ORIGIN
1 aaaaaaaaaa cccccccccc gggggggggg tttttttttt aaaaaaaaaa cccccccccc
61 gggggggggg tttttttttt aaaaaaaaaa ccccccccc
//
2 changes: 1 addition & 1 deletion src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ impl Evidence {
}

#[pyclass]
#[derive(Clone)]
#[derive(Clone, Debug)]
/// Struct to hold the information to construct a gene
pub struct GeneDef {
#[pyo3(get, set)]
Expand Down
73 changes: 72 additions & 1 deletion src/gene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use pyo3::prelude::*;
use std::collections::HashMap;
use std::string::String;
use std::usize;
use std::vec::Vec;

use crate::common::{Alt, AltType, Evidence, GeneDef};
Expand Down Expand Up @@ -290,7 +291,11 @@ impl Gene {
// Figure out the nucelotide number for each position
// Promoter first
if gene_def.promoter_start != -1 {
for (nc_idx, i) in ((-(gene_def.promoter_size + 1))..0).enumerate() {
let mut promoter = -(gene_def.promoter_size + 1);
if gene_def.reverse_complement || gene_def.promoter_start == 0 {
promoter = -(gene_def.promoter_size);
}
for (nc_idx, i) in ((promoter)..0).enumerate() {
nucleotide_number.push(i);
gene_number.push(i);
gene_positions.push(GenePosition {
Expand Down Expand Up @@ -550,6 +555,72 @@ impl Gene {
}
}
}

pub fn at_promoter<'a, T>(&self, arr: &'a [T]) -> &'a [T] {
if arr.len() == self.nucleotide_number.len() {
// We're fetching something which is indexed by nucleotide number
let mut promoter_end_idx = usize::MAX;
for (idx, nc_num) in self.nucleotide_number.iter().enumerate() {
if *nc_num == 1 {
promoter_end_idx = idx;
break;
}
}
if promoter_end_idx == usize::MAX {
panic!("Promoter end not found in gene {}", self.name)
}
return &arr[0..promoter_end_idx];
}
if arr.len() == self.gene_number.len() {
// We're fetching something which is indexed by gene number
let mut promoter_end_idx = usize::MAX;
for (idx, gene_num) in self.gene_number.iter().enumerate() {
if *gene_num == 1 {
promoter_end_idx = idx;
break;
}
}
if promoter_end_idx == usize::MAX {
panic!("Promoter end not found in gene {}", self.name)
}
return &arr[0..promoter_end_idx];
}

panic!("Invalid array length for promoter check!")
}

pub fn not_promoter<'a, T>(&self, arr: &'a [T]) -> &'a [T] {
if arr.len() == self.nucleotide_number.len() {
// We're fetching something which is indexed by nucleotide number
let mut promoter_end_idx = usize::MAX;
for (idx, nc_num) in self.nucleotide_number.iter().enumerate() {
if *nc_num == 1 {
promoter_end_idx = idx;
break;
}
}
if promoter_end_idx == usize::MAX {
panic!("Promoter end not found in gene {}", self.name)
}
return &arr[promoter_end_idx..arr.len()];
}
if arr.len() == self.gene_number.len() {
// We're fetching something which is indexed by gene number
let mut promoter_end_idx = usize::MAX;
for (idx, gene_num) in self.gene_number.iter().enumerate() {
if *gene_num == 1 {
promoter_end_idx = idx;
break;
}
}
if promoter_end_idx == usize::MAX {
panic!("Promoter end not found in gene {}", self.name)
}
return &arr[promoter_end_idx..arr.len()];
}

panic!("Invalid array length for promoter check!")
}
}

/// Converts a codon to an amino acid
Expand Down
Loading

0 comments on commit 145f4a5

Please sign in to comment.