Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support protein sequences #36

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
lint
nh13 committed Dec 11, 2024
commit 9612b6414f8d27528c3a6d4cd8d99262f160d6fd
21 changes: 14 additions & 7 deletions src/lib/matcher.rs
Original file line number Diff line number Diff line change
@@ -4,8 +4,8 @@ use std::ops::Range;
use crate::color::{color_background, color_head};
use crate::color::{COLOR_BACKGROUND, COLOR_BASES, COLOR_QUALS};
use crate::reverse_complement;
use crate::DNA_BASES;
use crate::AMINO_ACIDS;
use crate::DNA_BASES;
use anyhow::{bail, Context, Result};
use bstr::ByteSlice;
use regex::bytes::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
@@ -622,20 +622,27 @@ pub mod tests {
#[rstest]
#[case("AGTGTGATG", false)]
#[case("QRNQRNQRN", true)]
fn test_validate_fixed_pattern_is_ok(
#[case] pattern: &str,
#[case] protein: bool) {
fn test_validate_fixed_pattern_is_ok(#[case] pattern: &str, #[case] protein: bool) {
let result = validate_fixed_pattern(&pattern, protein);
assert!(result.is_ok())
}

#[rstest]
#[case("AXGTGTGATG", false, "Fixed pattern must contain only DNA bases: A .. [X] .. GTGTGATG")]
#[case("QRNQRNZQRN", true, "Fixed pattern must contain only amino acids: QRNQRN .. [Z] .. QRN")]
#[case(
"AXGTGTGATG",
false,
"Fixed pattern must contain only DNA bases: A .. [X] .. GTGTGATG"
)]
#[case(
"QRNQRNZQRN",
true,
"Fixed pattern must contain only amino acids: QRNQRN .. [Z] .. QRN"
)]
fn test_validate_fixed_pattern_error(
#[case] pattern: &str,
#[case] protein: bool,
#[case] msg: &str) {
#[case] msg: &str,
) {
let result = validate_fixed_pattern(&pattern, protein);
let inner = result.unwrap_err().to_string();
assert_eq!(inner, msg);
15 changes: 6 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -805,7 +805,7 @@ pub mod tests {
let sequence_match = expected_seq == return_sequences;
assert_eq!(sequence_match, expected_bool);
}

// ############################################################################################
//Tests match with protein (not DNA!)
// ############################################################################################
@@ -819,14 +819,11 @@ pub mod tests {
#[case(vec!["^Q", "^F"], vec!["QFPQFP"])] // unpaired: regex set with two matches
#[case(vec!["^Q", "^A"], vec!["AAAA", "ATAT", "AAAT", "QFPQFP"])] // unpaired: regex set with two matches
#[case(vec!["^M", "^K"], vec![])] // unpaired: regex set with no matches
fn test_protein_ok(
#[case] pattern: Vec<&str>,
#[case] expected_seq: Vec<&str>,
) {
fn test_protein_ok(#[case] pattern: Vec<&str>, #[case] expected_seq: Vec<&str>) {
let dir = TempDir::new().unwrap();
let seqs = vec![
vec!["AAAA", "TTTT", "ATAT", "TATA", "AAAT", "TTTA", "QFPQFP"],
];
let seqs = vec![vec![
"AAAA", "TTTT", "ATAT", "TATA", "AAAT", "TTTA", "QFPQFP",
]];
let out_path = dir.path().join(String::from("output.fq"));
let result_path = &out_path.clone();
let pattern = pattern.iter().map(|&s| s.to_owned()).collect::<Vec<_>>();
@@ -880,7 +877,7 @@ pub mod tests {
let result = fqgrep_from_opts(&opts);
assert_eq!(result.unwrap(), expected);
}

//
// ############################################################################################
// Tests that an error is returned when protein and reverse_complement are both present