Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable header parsing to use R: std::io::BufRead #377

Merged
merged 3 commits into from
Mar 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions age/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to Rust's notion of
to 1.0.0 are beta releases.

## [Unreleased]
### Added
- `age::Decryptor::new_buffered`, which is more efficient for types implementing
`std::io::BufRead` (which includes `&[u8]` slices).
- `impl std::io::BufRead for age::armor::ArmoredReader`

## [0.9.1] - 2022-03-24
### Added
Expand Down
2 changes: 1 addition & 1 deletion age/benches/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn bench(c: &mut Criterion) {
output.write_all(&[]).unwrap();
output.finish().unwrap();

b.iter(|| Decryptor::new(&encrypted[..]))
b.iter(|| Decryptor::new_buffered(&encrypted[..]))
});
}

Expand Down
2 changes: 1 addition & 1 deletion age/benches/throughput.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ fn bench(c: &mut Criterion_) {
output.finish().unwrap();

b.iter(|| {
let decryptor = match Decryptor::new(&ct_buf[..]).unwrap() {
let decryptor = match Decryptor::new_buffered(&ct_buf[..]).unwrap() {
Decryptor::Recipients(decryptor) => decryptor,
_ => panic!(),
};
Expand Down
30 changes: 29 additions & 1 deletion age/src/format.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! The age file format.

use age_core::format::Stanza;
use std::io::{self, Read, Write};
use std::io::{self, BufRead, Read, Write};

use crate::{
error::DecryptError,
Expand Down Expand Up @@ -88,6 +88,34 @@ impl Header {
}
}

pub(crate) fn read_buffered<R: BufRead>(mut input: R) -> Result<Self, DecryptError> {
let mut data = vec![];
loop {
match read::header(&data) {
Ok((_, mut header)) => {
if let Header::V1(h) = &mut header {
h.encoded_bytes = Some(data);
}
break Ok(header);
}
Err(nom::Err::Incomplete(nom::Needed::Size(_))) => {
// As we have a buffered reader, we can leverage the fact that the
// currently-defined header formats are newline-separated, to more
// efficiently read data for the parser to consume.
if input.read_until(b'\n', &mut data)? == 0 {
break Err(DecryptError::Io(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete header",
)));
}
}
Err(_) => {
break Err(DecryptError::InvalidHeader);
}
}
}
}

#[cfg(feature = "async")]
#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
pub(crate) async fn read_async<R: AsyncRead + Unpin>(
Expand Down
183 changes: 95 additions & 88 deletions age/src/primitives/armor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ impl<R> ArmoredReader<R> {
///
/// Returns the number of bytes read into the buffer, or None if there was no cached
/// data.
#[cfg(feature = "async")]
fn read_cached_data(&mut self, buf: &mut [u8]) -> Option<usize> {
if self.byte_start >= self.byte_end {
None
Expand Down Expand Up @@ -820,69 +821,106 @@ impl<R> ArmoredReader<R> {
}
}

impl<R: BufRead> Read for ArmoredReader<R> {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
impl<R: BufRead> BufRead for ArmoredReader<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
loop {
match self.is_armored {
None => {
self.inner.read_exact(&mut self.byte_buf[..MIN_ARMOR_LEN])?;
self.detect_armor()?
}
Some(false) => {
// Return any leftover data from armor detection
return if let Some(read) = self.read_cached_data(buf) {
Ok(read)
} else {
self.inner.read(buf).map(|read| {
self.data_read += read;
self.count_reader_bytes(read)
break if self.byte_start >= self.byte_end {
self.inner.read(&mut self.byte_buf[..]).map(|read| {
self.byte_start = 0;
self.byte_end = read;
self.count_reader_bytes(read);
&self.byte_buf[..read]
})
};
} else {
Ok(&self.byte_buf[self.byte_start..self.byte_end])
}
}
Some(true) => {
break if self.found_end {
Ok(&[])
} else if self.byte_start >= self.byte_end {
if self.read_next_armor_line()? {
Ok(&[])
} else {
Ok(&self.byte_buf[self.byte_start..self.byte_end])
}
} else {
Ok(&self.byte_buf[self.byte_start..self.byte_end])
}
}
Some(true) => break,
}
}
if self.found_end {
return Ok(0);
}
}

let buf_len = buf.len();
fn consume(&mut self, amt: usize) {
self.byte_start += amt;
self.data_read += amt;
assert!(self.byte_start <= self.byte_end);
}
}

// Output any remaining bytes from the previous line
if let Some(read) = self.read_cached_data(buf) {
buf = &mut buf[read..];
}
impl<R: BufRead> ArmoredReader<R> {
/// Fills `self.byte_buf` with the next line of armored data.
///
/// Returns `true` if this was the last line.
fn read_next_armor_line(&mut self) -> io::Result<bool> {
assert_eq!(self.is_armored, Some(true));

while !buf.is_empty() {
// Read the next line
self.inner
.read_line(&mut self.line_buf)
.map(|read| self.count_reader_bytes(read))?;

// Parse the line into bytes
if self.parse_armor_line()? {
// This was the last line! Check for trailing garbage.
loop {
let amt = match self.inner.fill_buf()? {
&[] => break,
buf => {
if buf.iter().any(|b| !b.is_ascii_whitespace()) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
ArmoredReadError::TrailingGarbage,
));
}
buf.len()
// Read the next line
self.inner
.read_line(&mut self.line_buf)
.map(|read| self.count_reader_bytes(read))?;

// Parse the line into bytes
if self.parse_armor_line()? {
// This was the last line! Check for trailing garbage.
loop {
let amt = match self.inner.fill_buf()? {
&[] => break,
buf => {
if buf.iter().any(|b| !b.is_ascii_whitespace()) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
ArmoredReadError::TrailingGarbage,
));
}
};
self.inner.consume(amt);
}
break;
buf.len()
}
};
self.inner.consume(amt);
}
Ok(true)
} else {
Ok(false)
}
}
}

impl<R: BufRead> Read for ArmoredReader<R> {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
let buf_len = buf.len();

while !buf.is_empty() {
match self.fill_buf()? {
[] => break,
next => {
let read = cmp::min(next.len(), buf.len());

if next.len() < buf.len() {
buf[..read].copy_from_slice(next);
} else {
buf.copy_from_slice(&next[..read]);
}

// Output as much as we can of this line
if let Some(read) = self.read_cached_data(buf) {
buf = &mut buf[read..];
self.consume(read);
buf = &mut buf[read..];
}
}
}

Expand Down Expand Up @@ -1061,49 +1099,9 @@ impl<R: BufRead + Seek> Seek for ArmoredReader<R> {
self.inner.read_exact(&mut self.byte_buf[..MIN_ARMOR_LEN])?;
self.detect_armor()?
}
Some(false) => {
break if self.byte_start >= self.byte_end {
// Map the data read onto the underlying stream.
let start = self.start()?;
let pos = match pos {
SeekFrom::Start(offset) => SeekFrom::Start(start + offset),
// Current and End positions don't need to be shifted.
x => x,
};
self.inner.seek(pos)
} else {
// We are still inside the first line.
match pos {
SeekFrom::Start(offset) => self.byte_start = offset as usize,
SeekFrom::Current(offset) => {
let res = (self.byte_start as i64) + offset;
if res >= 0 {
self.byte_start = res as usize;
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"cannot seek before the start",
));
}
}
SeekFrom::End(offset) => {
let res = (self.line_buf.len() as i64) + offset;
if res >= 0 {
self.byte_start = res as usize;
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"cannot seek before the start",
));
}
}
}
Ok(self.byte_start as u64)
};
}
Some(true) => {
Some(armored) => {
// Convert the offset into the target position within the data inside
// the armor.
// the (maybe) armor.
let start = self.start()?;
let target_pos = match pos {
SeekFrom::Start(offset) => offset,
Expand Down Expand Up @@ -1146,6 +1144,15 @@ impl<R: BufRead + Seek> Seek for ArmoredReader<R> {
}
};

if !armored {
// We can seek directly on the inner reader.
self.inner.seek(SeekFrom::Start(start + target_pos))?;
self.byte_start = 0;
self.byte_end = 0;
self.data_read = target_pos as usize;
break Ok(self.data_read as u64);
}

// Jump back to the start of the armor data, and then read and drop
// until we reach the target position. This is very inefficient, but
// as armored files can have arbitrary line endings within the file,
Expand Down
31 changes: 30 additions & 1 deletion age/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use age_core::{format::grease_the_joint, secrecy::SecretString};
use rand::{rngs::OsRng, RngCore};
use std::io::{self, Read, Write};
use std::io::{self, BufRead, Read, Write};

use crate::{
error::{DecryptError, EncryptError},
Expand Down Expand Up @@ -181,6 +181,13 @@ impl<R: Read> Decryptor<R> {
/// Attempts to create a decryptor for an age file.
///
/// Returns an error if the input does not contain a valid age file.
///
/// # Performance
///
/// This constructor will work with any type implementing [`io::Read`], and uses a
/// slower parser and internal buffering to ensure no overreading occurs. Consider
/// using [`Decryptor::new_buffered`] for types implementing `std::io::BufRead`, which
/// includes `&[u8]` slices.
pub fn new(mut input: R) -> Result<Self, DecryptError> {
let header = Header::read(&mut input)?;

Expand All @@ -194,6 +201,28 @@ impl<R: Read> Decryptor<R> {
}
}

impl<R: BufRead> Decryptor<R> {
/// Attempts to create a decryptor for an age file.
///
/// Returns an error if the input does not contain a valid age file.
///
/// # Performance
///
/// This constructor is more performant than [`Decryptor::new`] for types implementing
/// [`io::BufRead`], which includes `&[u8]` slices.
pub fn new_buffered(mut input: R) -> Result<Self, DecryptError> {
let header = Header::read_buffered(&mut input)?;

match header {
Header::V1(v1_header) => {
let nonce = Nonce::read(&mut input)?;
Decryptor::from_v1_header(input, v1_header, nonce)
}
Header::Unknown(_) => Err(DecryptError::UnknownFormat),
}
}
}

#[cfg(feature = "async")]
#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
impl<R: AsyncRead + Unpin> Decryptor<R> {
Expand Down
Loading