-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchrom_ordering.rs
91 lines (87 loc) · 2.56 KB
/
chrom_ordering.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
use crate::string::String;
use hashbrown::HashMap;
use std::io::{self, BufRead, Read};
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct Chromosome {
pub(crate) index: usize,
pub(crate) length: Option<usize>,
}
/// A genome is a map from chromosome name to index with an optional chromosome length.
pub fn parse_genome<R>(reader: R) -> io::Result<HashMap<String, Chromosome>>
where
R: Read,
{
let mut reader = io::BufReader::new(reader);
let mut genome = HashMap::default();
let mut line = std::string::String::new();
while reader.read_line(&mut line)? > 0 {
if line.trim().is_empty() || line.starts_with('#') {
line.clear();
continue;
}
let mut fields = line.split_whitespace();
match fields.next() {
Some(chrom) => {
let length = fields.next().map(|s| s.parse::<usize>());
let l = length.and_then(|c| match c {
Ok(l) => Some(l),
Err(_) => {
log::warn!(
"invalid length for chromosome {} with line: {}",
chrom,
line
);
None
}
});
genome.insert(
String::from(chrom),
Chromosome {
index: genome.len(),
length: l,
},
);
}
None => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("invalid genome file line: {}", line),
))
}
}
//.expect("require at least one column in genome file");
line.clear();
}
Ok(genome)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_genome() {
let genome_str = "chr1\nchr2\t43\nchr3\n";
let genome = parse_genome(genome_str.as_bytes()).unwrap();
assert_eq!(genome.len(), 3);
assert_eq!(
genome.get("chr1"),
Some(&Chromosome {
index: 0,
length: None
})
);
assert_eq!(
genome.get("chr2"),
Some(&Chromosome {
index: 1,
length: Some(43)
})
);
assert_eq!(
genome.get("chr3"),
Some(&Chromosome {
index: 2,
length: None
})
);
}
}