Skip to content

Commit

Permalink
Use a YamlDecoder builder to implement optional encoding_trap parameter.
Browse files Browse the repository at this point in the history
  • Loading branch information
mkmik committed Jul 30, 2020
1 parent b2d648e commit b9920c7
Showing 1 changed file with 54 additions and 18 deletions.
72 changes: 54 additions & 18 deletions src/yaml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,22 +210,42 @@ impl YamlLoader {
parser.load(&mut loader, true)?;
Ok(loader.docs)
}
}

pub fn load_from_bytes(mut source: impl std::io::Read) -> Result<Vec<Yaml>, LoadError> {
let mut buffer = Vec::new();
source.read_to_end(&mut buffer)?;

// Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint.
// If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by
// detect_utf16_endianness.
let (res, _) = encoding::types::decode(
&buffer,
encoding::DecoderTrap::Strict,
detect_utf16_endianness(&buffer),
);
let s = res.map_err(LoadError::Decode)?;
YamlLoader::load_from_str(&s).map_err(LoadError::Scan)
pub struct YamlDecoder<T: std::io::Read> {
source: T,
trap: encoding::types::DecoderTrap,
}


impl<T: std::io::Read> YamlDecoder<T> {
pub fn read(source: T) -> YamlDecoder<T> {
YamlDecoder{
source: source,
trap: encoding::DecoderTrap::Strict,
}
}

pub fn encoding_trap(mut self, trap: encoding::types::DecoderTrap) -> YamlDecoder<T> {
self.trap = trap;
self
}

pub fn decode(mut self) -> Result<Vec<Yaml>, LoadError> {
let mut buffer = Vec::new();
self.source.read_to_end(&mut buffer)?;

// Decodes the input buffer using either UTF-8, UTF-16LE or UTF-16BE depending on the BOM codepoint.
// If the buffer doesn't start with a BOM codepoint, it will use a fallback encoding obtained by
// detect_utf16_endianness.
let (res, _) = encoding::types::decode(
&buffer,
self.trap,
detect_utf16_endianness(&buffer),
);
let s = res.map_err(LoadError::Decode)?;
YamlLoader::load_from_str(&s).map_err(LoadError::Scan)
}
}

/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the
Expand Down Expand Up @@ -793,7 +813,7 @@ a: 1
b: 2.2
c: [1, 2]
";
let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap();
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64);
Expand All @@ -808,7 +828,7 @@ c: [1, 2]
\x00b\x00:\x00 \x002\x00.\x002\x00
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
\x00";
let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap();
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
println!("GOT: {:?}", doc);
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
Expand All @@ -824,7 +844,7 @@ c: [1, 2]
\x00b\x00:\x00 \x002\x00.\x002\x00
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
";
let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap();
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
println!("GOT: {:?}", doc);
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
Expand All @@ -840,7 +860,23 @@ c: [1, 2]
\x00b\x00:\x00 \x002\x00.\x002\x00
\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
\x00";
let out = YamlLoader::load_from_bytes(s as &[u8]).unwrap();
let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
let doc = &out[0];
println!("GOT: {:?}", doc);
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64);
assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
assert!(doc["d"][0].is_badvalue());
}

#[test]
fn test_read_trap() {
let s = b"---
a\xa9: 1
b: 2.2
c: [1, 2]
";
let out = YamlDecoder::read(s as &[u8]).encoding_trap(encoding::DecoderTrap::Ignore).decode().unwrap();
let doc = &out[0];
println!("GOT: {:?}", doc);
assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
Expand Down

0 comments on commit b9920c7

Please sign in to comment.