From ad5e4aab5822deb6bcf4b80515c8d09835aeea31 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 12 Feb 2024 14:50:32 -0600 Subject: [PATCH 1/4] refactor(stream): Forward to bytes where possible --- src/stream/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/stream/mod.rs b/src/stream/mod.rs index b1ad69cd..d42fc8c5 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -2409,7 +2409,7 @@ impl<'i, 's> FindSlice<(&'s str, &'s str, &'s str)> for &'i [u8] { impl<'i, 's> FindSlice<&'s str> for &'i str { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option> { - self.as_bytes().find_slice(substr.as_bytes()) + self.as_bytes().find_slice(substr) } } @@ -2482,28 +2482,28 @@ impl<'i> FindSlice<(char, char, char)> for &'i str { impl<'i> FindSlice for &'i str { #[inline(always)] fn find_slice(&self, substr: u8) -> Option> { - self.find_slice(substr.as_char()) + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<(u8,)> for &'i str { #[inline(always)] fn find_slice(&self, substr: (u8,)) -> Option> { - self.find_slice((substr.0.as_char(),)) + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<(u8, u8)> for &'i str { #[inline(always)] fn find_slice(&self, substr: (u8, u8)) -> Option> { - self.find_slice((substr.0.as_char(), substr.1.as_char())) + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<(u8, u8, u8)> for &'i str { #[inline(always)] fn find_slice(&self, substr: (u8, u8, u8)) -> Option> { - self.find_slice((substr.0.as_char(), substr.1.as_char(), substr.2.as_char())) + self.as_bytes().find_slice(substr) } } From 0a7ef282d77e20dc52e1f65a1ba80eba0cd8bb5c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 12 Feb 2024 14:52:14 -0600 Subject: [PATCH 2/4] feat(stream): Allow finding chars in byte slices --- src/stream/mod.rs | 64 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/src/stream/mod.rs b/src/stream/mod.rs index d42fc8c5..96b2be84 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -2340,6 +2340,48 @@ impl<'i, 's> FindSlice<(&'s [u8], &'s [u8], &'s [u8])> for &'i [u8] { } } +impl<'i> FindSlice for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: char) -> Option> { + let mut b = [0; 4]; + let substr = substr.encode_utf8(&mut b); + self.find_slice(&*substr) + } +} + +impl<'i> FindSlice<(char,)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (char,)) -> Option> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + self.find_slice((&*substr0,)) + } +} + +impl<'i> FindSlice<(char, char)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (char, char)) -> Option> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr1 = substr.1.encode_utf8(&mut b); + self.find_slice((&*substr0, &*substr1)) + } +} + +impl<'i> FindSlice<(char, char, char)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (char, char, char)) -> Option> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr1 = substr.1.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr2 = substr.2.encode_utf8(&mut b); + self.find_slice((&*substr0, &*substr1, &*substr2)) + } +} + impl<'i> FindSlice for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: u8) -> Option> { @@ -2440,42 +2482,28 @@ impl<'i, 's> FindSlice<(&'s str, &'s str, &'s str)> for &'i str { impl<'i> FindSlice for &'i str { #[inline(always)] fn find_slice(&self, substr: char) -> Option> { - let mut b = [0; 4]; - let substr = substr.encode_utf8(&mut b); - self.find_slice(&*substr) + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<(char,)> for &'i str { #[inline(always)] fn find_slice(&self, substr: (char,)) -> Option> { - let mut b = [0; 4]; - let substr0 = substr.0.encode_utf8(&mut b); - self.find_slice((&*substr0,)) + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<(char, char)> for &'i str { #[inline(always)] fn find_slice(&self, substr: (char, char)) -> Option> { - let mut b = [0; 4]; - let substr0 = substr.0.encode_utf8(&mut b); - let mut b = [0; 4]; - let substr1 = substr.1.encode_utf8(&mut b); - self.find_slice((&*substr0, &*substr1)) + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<(char, char, char)> for &'i str { #[inline(always)] fn find_slice(&self, substr: (char, char, char)) -> Option> { - let mut b = [0; 4]; - let substr0 = substr.0.encode_utf8(&mut b); - let mut b = [0; 4]; - let substr1 = substr.1.encode_utf8(&mut b); - let mut b = [0; 4]; - let substr2 = substr.2.encode_utf8(&mut b); - self.find_slice((&*substr0, &*substr1, &*substr2)) + self.as_bytes().find_slice(substr) } } From 3fd0b76031ad1e776daa5150b67d00984771eeb9 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 12 Feb 2024 14:53:41 -0600 Subject: [PATCH 3/4] fix(ascii): Search for char newlines --- src/ascii/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ascii/mod.rs b/src/ascii/mod.rs index fdfa236a..824baee6 100644 --- a/src/ascii/mod.rs +++ b/src/ascii/mod.rs @@ -132,7 +132,7 @@ where I: StreamIsPartial, I: Stream, I: Compare<&'static str>, - I: FindSlice<(u8, u8)>, + I: FindSlice<(char, char)>, ::Token: AsChar + Clone, { trace("till_line_ending", move |input: &mut I| { @@ -152,10 +152,10 @@ where I: StreamIsPartial, I: Stream, I: Compare<&'static str>, - I: FindSlice<(u8, u8)>, + I: FindSlice<(char, char)>, ::Token: AsChar + Clone, { - let res = match take_until::<_, _, ()>(0.., (b'\r', b'\n')).parse_next(input) { + let res = match take_until::<_, _, ()>(0.., ('\r', '\n')).parse_next(input) { Ok(slice) => slice, Err(ErrMode::Incomplete(err)) => { return Err(ErrMode::Incomplete(err)); From ab480801572f21fbda516ded6af57eb4f2f0fe45 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 12 Feb 2024 14:59:40 -0600 Subject: [PATCH 4/4] fix(stream)!: Prevent slicing on non-UTF8 boundaries Fixes #460 --- src/stream/mod.rs | 56 ----------------------------------------------- 1 file changed, 56 deletions(-) diff --git a/src/stream/mod.rs b/src/stream/mod.rs index 96b2be84..87ff1580 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -2194,20 +2194,6 @@ impl<'a, 'b> Compare> for &'a str { } } -impl<'a> Compare for &'a str { - #[inline(always)] - fn compare(&self, t: u8) -> CompareResult { - self.as_bytes().compare(t) - } -} - -impl<'a> Compare> for &'a str { - #[inline(always)] - fn compare(&self, t: AsciiCaseless) -> CompareResult { - self.as_bytes().compare(t) - } -} - impl<'a> Compare for &'a str { #[inline(always)] fn compare(&self, t: char) -> CompareResult { @@ -2222,20 +2208,6 @@ impl<'a> Compare> for &'a str { } } -impl<'a, 'b> Compare<&'b [u8]> for &'a str { - #[inline(always)] - fn compare(&self, t: &'b [u8]) -> CompareResult { - self.as_bytes().compare(t) - } -} - -impl<'a, 'b> Compare> for &'a str { - #[inline(always)] - fn compare(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { - self.as_bytes().compare(t) - } -} - impl<'a, T> Compare for &'a Bytes where &'a [u8]: Compare, @@ -2507,34 +2479,6 @@ impl<'i> FindSlice<(char, char, char)> for &'i str { } } -impl<'i> FindSlice for &'i str { - #[inline(always)] - fn find_slice(&self, substr: u8) -> Option> { - self.as_bytes().find_slice(substr) - } -} - -impl<'i> FindSlice<(u8,)> for &'i str { - #[inline(always)] - fn find_slice(&self, substr: (u8,)) -> Option> { - self.as_bytes().find_slice(substr) - } -} - -impl<'i> FindSlice<(u8, u8)> for &'i str { - #[inline(always)] - fn find_slice(&self, substr: (u8, u8)) -> Option> { - self.as_bytes().find_slice(substr) - } -} - -impl<'i> FindSlice<(u8, u8, u8)> for &'i str { - #[inline(always)] - fn find_slice(&self, substr: (u8, u8, u8)) -> Option> { - self.as_bytes().find_slice(substr) - } -} - impl<'i, S> FindSlice for &'i Bytes where &'i [u8]: FindSlice,