From 6978f311e5cb1f7c70ec0dad0df7ef149eef97ab Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 25 Jan 2024 20:50:36 -0600 Subject: [PATCH 1/2] fix(comb): Generalize repeat_till0 as repeat_till --- src/combinator/multi.rs | 142 +++++++++++++++++++++++++++++++++------- src/combinator/tests.rs | 40 +++++++++++ 2 files changed, 157 insertions(+), 25 deletions(-) diff --git a/src/combinator/multi.rs b/src/combinator/multi.rs index 926d41ee..006f8782 100644 --- a/src/combinator/multi.rs +++ b/src/combinator/multi.rs @@ -283,11 +283,11 @@ where /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::combinator::repeat_till0; +/// use winnow::combinator::repeat_till; /// use winnow::token::tag; /// /// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> { -/// repeat_till0("abc", "end").parse_peek(s) +/// repeat_till(0.., "abc", "end").parse_peek(s) /// }; /// /// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end")))); @@ -298,7 +298,11 @@ where /// # } /// ``` #[doc(alias = "many_till0")] -pub fn repeat_till0(mut f: F, mut g: G) -> impl Parser +pub fn repeat_till( + range: impl Into, + mut f: F, + mut g: G, +) -> impl Parser where I: Stream, C: Accumulate, @@ -306,34 +310,122 @@ where G: Parser, E: ParserError, { - trace("repeat_till0", move |i: &mut I| { - let mut res = C::initial(None); - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match g.parse_next(i) { - Ok(o) => return Ok((res, o)), - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - match f.parse_next(i) { - Err(e) => return Err(e.append(i, ErrorKind::Many)), - Ok(o) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert( - i, - "`repeat` parsers must always consume", - )); - } + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("repeat_till", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => repeat_till0_(&mut f, &mut g, i), + (start, end) => repeat_till_m_n_(start, end.unwrap_or(usize::MAX), &mut f, &mut g, i), + } + }) +} - res.accumulate(o); +/// Deprecated, replaced with [`repeat_till`] +#[deprecated(since = "0.5.35", note = "Replaced with `repeat_till`")] +#[inline(always)] +pub fn repeat_till0(f: F, g: G) -> impl Parser +where + I: Stream, + C: Accumulate, + F: Parser, + G: Parser, + E: ParserError, +{ + repeat_till(0.., f, g) +} + +fn repeat_till0_(f: &mut F, g: &mut G, i: &mut I) -> PResult<(C, P), E> +where + I: Stream, + C: Accumulate, + F: Parser, + G: Parser, + E: ParserError, +{ + let mut res = C::initial(None); + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + match f.parse_next(i) { + Err(e) => return Err(e.append(i, ErrorKind::Many)), + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); } + + res.accumulate(o); } } - Err(e) => return Err(e), } + Err(e) => return Err(e), } - }) + } +} + +fn repeat_till_m_n_( + min: usize, + max: usize, + f: &mut F, + g: &mut G, + i: &mut I, +) -> PResult<(C, P), E> +where + I: Stream, + C: Accumulate, + F: Parser, + G: Parser, + E: ParserError, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(i, ErrorKind::Many))); + } + + let mut res = C::initial(Some(min)); + for _ in 0..min { + match f.parse_next(i) { + Ok(o) => { + res.accumulate(o); + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + } + } + for count in min..=max { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(err)) => { + if count == max { + return Err(ErrMode::Backtrack(err)); + } + i.reset(start); + match f.parse_next(i) { + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); + } + + res.accumulate(o); + } + } + } + Err(e) => return Err(e), + } + } + unreachable!() } /// [`Accumulate`] the output of a parser, interleaved with `sep` diff --git a/src/combinator/tests.rs b/src/combinator/tests.rs index 27fa5345..92cfec92 100644 --- a/src/combinator/tests.rs +++ b/src/combinator/tests.rs @@ -987,6 +987,46 @@ fn repeat_till_test() { ); } +#[test] +#[cfg(feature = "alloc")] +fn repeat_till_range_test() { + #[allow(clippy::type_complexity)] + fn multi(i: &str) -> IResult<&str, (Vec<&str>, &str)> { + repeat_till(2..=4, "ab", "cd").parse_peek(i) + } + + assert_eq!( + multi("cd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"cd", ErrorKind::Tag) + ))) + ); + assert_eq!( + multi("abcd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"cd", ErrorKind::Tag) + ))) + ); + assert_eq!(multi("ababcd"), Ok(("", (vec!["ab", "ab"], "cd")))); + assert_eq!(multi("abababcd"), Ok(("", (vec!["ab", "ab", "ab"], "cd")))); + assert_eq!( + multi("ababababcd"), + Ok(("", (vec!["ab", "ab", "ab", "ab"], "cd"))) + ); + assert_eq!( + multi("abababababcd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"abcd", ErrorKind::Tag) + ))) + ); +} + #[test] #[cfg(feature = "std")] fn infinite_many() { From eb5426991953fe2d343ea6f5e3ac810168fd41c5 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 25 Jan 2024 20:51:59 -0600 Subject: [PATCH 2/2] refactor: Resolve repeat_till1 deprecations --- src/combinator/mod.rs | 2 +- src/combinator/tests.rs | 2 +- tests/testsuite/overflow.rs | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index 252c36af..da5fa792 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -40,7 +40,7 @@ //! | combinator | usage | input | new input | output | comment | //! |---|---|---|---|---|---| //! | [`repeat`] | `repeat(1..=3, "ab")` | `"ababc"` | `"c"` | `Ok(vec!["ab", "ab"])` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -//! | [`repeat_till0`] | `repeat_till0(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `"g"` | `Ok((vec!["ab", "ab"], "ef"))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +//! | [`repeat_till`] | `repeat_till(0.., tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `"g"` | `Ok((vec!["ab", "ab"], "ef"))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| //! | [`separated`] | `separated(1..=3, "ab", ",")` | `"ab,ab,ab."` | `"."` | `Ok(vec!["ab", "ab", "ab"])` |Applies the parser and separator between m and n times (n included) and returns the list of results in a Vec| //! | [`fold_repeat`] | `fold_repeat(1..=2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `[3]` | `Ok(3)` |Applies the parser between m and n times (n included) and folds the list of return value| //! diff --git a/src/combinator/tests.rs b/src/combinator/tests.rs index 92cfec92..213d17d9 100644 --- a/src/combinator/tests.rs +++ b/src/combinator/tests.rs @@ -966,7 +966,7 @@ fn repeat1_test() { fn repeat_till_test() { #[allow(clippy::type_complexity)] fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { - repeat_till0("abcd", "efgh").parse_peek(i) + repeat_till(0.., "abcd", "efgh").parse_peek(i) } let a = b"abcdabcdefghabcd"; diff --git a/tests/testsuite/overflow.rs b/tests/testsuite/overflow.rs index c7339e55..9126808a 100644 --- a/tests/testsuite/overflow.rs +++ b/tests/testsuite/overflow.rs @@ -81,14 +81,14 @@ fn overflow_incomplete_many1() { #[test] #[cfg(feature = "alloc")] fn overflow_incomplete_many_till0() { - use winnow::combinator::repeat_till0; + use winnow::combinator::repeat_till; #[allow(clippy::type_complexity)] fn multi(i: Partial<&[u8]>) -> IResult, (Vec<&[u8]>, &[u8])> { - repeat_till0(length_take(be_u64), "abc").parse_peek(i) + repeat_till(0.., length_take(be_u64), "abc").parse_peek(i) } - // Trigger an overflow in repeat_till0 + // Trigger an overflow in repeat_till assert_eq!( multi(Partial::new( &b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]