Skip to content

Commit

Permalink
more movement to rust-htslib
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Nov 15, 2024
1 parent ffddd2b commit 733751f
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 132 deletions.
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,28 @@ smartstring = { version = "1.0.1", optional = true }
smol_str = { version = "0.2.0", optional = true }
compact_str = { version = "0.7.0", optional = true }
kstring = { version = "2.0.0", optional = true }
noodles = { version = "0.83.0" }
noodles = { version = "0.85.0" }
flate2 = "1.0.26"
clap = { version = "4.2.7", features = ['derive'] }
env_logger = "0.10.0"
log = "0.4.19"
linear-map = "1.2.0"
hashbrown = "0.14.0"
xvcf = { version = "0.1.4", git = "https://github.com/brentp/xvcf-rs" }
xvcf = { version = "0.2.0", git = "https://github.com/brentp/xvcf-rs" }
mimalloc = "0.1.39"
bitflags = "2.4.0"
rust-htslib = {version = "0.47.0", features = ["libdeflate"]}
bio = "2.0.3"

[features]
default = ["bed", "vcf", "bcf", "csi", "core", "bam", "sam", "bgzf"]
default = ["bed", "csi", "core", "bam", "sam", "bgzf"]
bam = ["noodles/bam"]
bed = ["noodles/bed"]
bgzf = ["noodles/bgzf"]
#cram = ["noodles/cram"]
sam = ["noodles/sam"]
vcf = ["noodles/vcf"]
csi = ["noodles/csi"]
core = ["noodles/core"]
bcf = ["noodles/bcf"]
# allow a Box<dyn Positioned> in the enum to support user-specified types.
dyn_positioned = []

Expand Down
91 changes: 29 additions & 62 deletions src/bedder_bed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,46 @@

use crate::position::{Field, FieldError, Position, Positioned, Value, Valued};
use crate::string::String;
pub use bed::record::Record;
pub use noodles::bed;
use noodles::core;
use bio::io::bed;
use std::io::{self, BufRead};
use std::result;

impl crate::position::Positioned for bed::record::Record<3> {
#[derive(Debug)]
pub struct BedRecord(bed::Record);

impl crate::position::Positioned for BedRecord {
#[inline]
fn chrom(&self) -> &str {
self.reference_sequence_name()
&self.0.chrom()
}

#[inline]
fn start(&self) -> u64 {
// noodles position is 1-based.
self.start_position().get() as u64 - 1
self.0.start()
}

fn set_start(&mut self, start: u64) {
// must build a new record to set start.
let pstart = core::Position::try_from(start as usize + 1).expect("invalid start");
let record = bed::Record::<3>::builder()
.set_reference_sequence_name(self.reference_sequence_name())
.set_start_position(pstart)
.set_end_position(self.end_position())
.set_optional_fields(self.optional_fields().clone())
.build()
.expect("error building record");
*self = record;
#[inline]
fn stop(&self) -> u64 {
self.0.end()
}

fn set_stop(&mut self, stop: u64) {
// must build a new record to set start.
let pstop = core::Position::try_from(stop as usize).expect("invalid stop");
let record = bed::Record::<3>::builder()
.set_reference_sequence_name(self.reference_sequence_name())
.set_start_position(self.start_position())
.set_end_position(pstop)
.set_optional_fields(self.optional_fields().clone())
.build()
.expect("error building record");
*self = record;
#[inline]
fn set_start(&mut self, start: u64) {
self.0.set_start(start);
}

#[inline]
fn stop(&self) -> u64 {
self.end_position().get() as u64
fn set_stop(&mut self, stop: u64) {
self.0.set_end(stop);
}

#[inline]
fn dup(&self) -> Box<dyn Positioned> {
Box::new(self.clone())
Box::new(BedRecord(self.0.clone()))
}
}

impl Valued for bed::record::Record<3> {
impl Valued for BedRecord {
fn value(&self, v: crate::position::Field) -> result::Result<Value, FieldError> {
match v {
Field::String(s) => Ok(Value::Strings(vec![s])),
Expand All @@ -81,7 +66,6 @@ where
R: BufRead,
{
reader: bed::Reader<R>,
buf: std::string::String,
last_record: Option<Last>,
line_number: u64,
}
Expand All @@ -93,7 +77,6 @@ where
pub fn new(r: R) -> BedderBed<R> {
BedderBed {
reader: bed::Reader::new(r),
buf: std::string::String::new(),
last_record: None,
line_number: 0,
}
Expand All @@ -108,46 +91,30 @@ where
&mut self,
_q: Option<&crate::position::Position>,
) -> Option<std::result::Result<Position, std::io::Error>> {
self.buf.clear();
loop {
self.line_number += 1;
return match self.reader.read_line(&mut self.buf) {
Ok(0) => None,
Ok(_) => {
if self.buf.starts_with('#') || self.buf.is_empty() {
continue;
}
let record: bed::record::Record<3> = match self.buf.parse() {
Err(e) => {
let msg = format!(
"line#{:?}:{:?} error: {:?}",
self.line_number, &self.buf, e
);
return Some(Err(io::Error::new(io::ErrorKind::InvalidData, msg)));
}
Ok(r) => r,
};

return match self.reader.records() {
None => None,
Some(Ok(record)) => {
match &mut self.last_record {
None => {
self.last_record = Some(Last {
chrom: String::from(record.chrom()),
start: record.start(),
stop: record.stop(),
chrom: String::from(&record.chrom),
start: record.start as u64,
stop: record.end as u64,
})
}
Some(r) => {
if r.chrom != record.chrom() {
r.chrom = String::from(record.chrom())
if r.chrom != record.chrom {
r.chrom = String::from(&record.chrom)
}
r.start = record.start();
r.stop = record.stop();
r.start = record.start as u64;
r.stop = record.end as u64;
}
}

Some(Ok(Position::Bed(record)))
}
Err(e) => Some(Err(e)),
Some(Err(e)) => Some(Err(io::Error::new(io::ErrorKind::InvalidData, e))),
};
}
}
Expand Down
119 changes: 60 additions & 59 deletions src/bedder_vcf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,22 @@
use crate::position::{Field, FieldError, Position, Positioned, Value};
use crate::string::String;
use noodles::core::Region;
use noodles::vcf::{self, record::Chromosome};

pub use rust_htslib::bcf;
use std::io::{self, Read, Seek};
use std::iter::Iterator;
use std::result;
use vcf::record::info::field;
use vcf::record::QualityScore;
pub use vcf::Record;
pub use xvcf;
use xvcf::Skip;

pub struct BedderVCF<R> {
reader: xvcf::Reader<R>,
pub struct BedderVCF<'a> {
reader: xvcf::Reader<'a>,
record_number: u64,
header: vcf::Header,
}

impl<R> BedderVCF<R>
where
R: Read + 'static,
{
pub fn new(r: xvcf::Reader<R>) -> io::Result<BedderVCF<R>> {
impl<'a> BedderVCF<'a> {
pub fn new(r: xvcf::Reader<'a>) -> io::Result<BedderVCF<'a>> {
let h = r.header().clone();
let v = BedderVCF {
reader: r,
Expand All @@ -32,78 +28,83 @@ where
}
}

pub fn match_info_value(info: &vcf::record::Info, name: &str) -> result::Result<Value, FieldError> {
//let info = record.info();
let key: vcf::record::info::field::Key = name
.parse()
.map_err(|_| FieldError::InvalidFieldName(String::from(name)))?;

match info.get(&key) {
pub fn match_info_value(
info: &rust_htslib::bcf::Record,
name: &str,
) -> result::Result<Value, FieldError> {
// Try to get the info field by name
match info
.info(name.as_bytes())
.map_err(|e| FieldError::InvalidFieldValue(e.to_string()))?
{
Some(value) => match value {
Some(field::Value::Integer(i)) => Ok(Value::Ints(vec![*i as i64])),
Some(field::Value::Float(f)) => Ok(Value::Floats(vec![*f as f64])),
Some(field::Value::String(s)) => Ok(Value::Strings(vec![String::from(s)])),
Some(field::Value::Character(c)) => {
Ok(Value::Strings(vec![String::from(c.to_string())]))
bcf::record::Info::Integer(arr) => {
Ok(Value::Ints(arr.into_iter().map(|v| v as i64).collect()))
}
//Some(field::Value::Flag) => Ok(Value::Strings(vec![String::from("true")])),
Some(field::Value::Array(arr)) => {
match arr {
field::value::Array::Integer(arr) => Ok(Value::Ints(
arr.iter().flatten().map(|&v| v as i64).collect(),
)),
field::value::Array::Float(arr) => Ok(Value::Floats(
arr.iter().flatten().map(|&v| v as f64).collect(),
)),
field::value::Array::String(arr) => Ok(Value::Strings(
arr.iter().flatten().map(String::from).collect(),
)),
field::value::Array::Character(arr) => Ok(Value::Strings(
arr.iter().flatten().map(|v| v.to_string().into()).collect(),
)),
//field::Value::Flag => Ok(Value::Strings(vec![String::from("true")])),
}
bcf::record::Info::Float(arr) => {
Ok(Value::Floats(arr.into_iter().map(|v| v as f64).collect()))
}

_ => Err(FieldError::InvalidFieldName(String::from(name))),
bcf::record::Info::String(arr) => Ok(Value::Strings(
arr.into_iter()
.map(|s| String::from_utf8_lossy(s).into_owned().into())
.collect(),
)),
bcf::record::Info::Flag(true) => Ok(Value::Strings(vec![String::from("true")])),
bcf::record::Info::Flag(false) => Ok(Value::Strings(vec![String::from("false")])),
},
None => Err(FieldError::InvalidFieldName(String::from(name))),
}
}

pub fn match_value(record: &vcf::record::Record, f: Field) -> result::Result<Value, FieldError> {
pub fn match_value(
record: &rust_htslib::bcf::Record,
f: Field,
) -> result::Result<Value, FieldError> {
match f {
Field::String(s) => match s.as_str() {
"chrom" => Ok(Value::Strings(vec![String::from(Positioned::chrom(
record,
))])),
"start" => Ok(Value::Ints(vec![Positioned::start(record) as i64])),
"stop" => Ok(Value::Ints(vec![Positioned::stop(record) as i64])),
"ID" => Ok(Value::Strings(
record.ids().iter().map(|s| s.to_string().into()).collect(),
)),
"FILTER" => Ok(Value::Strings(
record
"ID" => {
let ids = record
.id()
.map_err(|e| FieldError::InvalidFieldValue(e.to_string()))?;
Ok(Value::Strings(vec![String::from_utf8_lossy(ids)
.into_owned()
.into()]))
}
"FILTER" => {
let filters = record
.filters()
.iter()
.map(|s| String::from(s.to_string()))
.collect(),
)),
"QUAL" => Ok(Value::Floats(vec![f32::from(
record
.quality_score()
.unwrap_or(QualityScore::try_from(0f32).expect("error getting quality score")),
) as f64])),
.map_err(|e| FieldError::InvalidFieldValue(e.to_string()))?;
Ok(Value::Strings(
filters
.iter()
.map(|s| String::from_utf8_lossy(s).into_owned().into())
.collect(),
))
}
"QUAL" => {
let qual = record
.qual()
.map_err(|e| FieldError::InvalidFieldValue(e.to_string()))?;
Ok(Value::Floats(vec![if qual.is_nan() {
-1.0
} else {
qual as f64
}]))
}
_ => {
if s.len() > 5 && &s[0..5] == "INFO." {
match_info_value(record.info(), &s[5..])
match_info_value(record, &s[5..])
} else {
// TODO: format
unimplemented!();
}
}
},

Field::Int(i) => Err(FieldError::InvalidFieldIndex(i)),
}
}
Expand Down
Loading

0 comments on commit 733751f

Please sign in to comment.