From 76d691416aed1e65af8131779998ebb139d586e2 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 30 Jan 2025 08:02:02 -0600 Subject: [PATCH 1/2] feat(token): Add 'peek_any' This is a pattern that comes up enough that its worth optimizing --- examples/arithmetic/parser_lexer.rs | 19 ++++++------ examples/json/parser_dispatch.rs | 22 +++++++------- src/ascii/mod.rs | 8 ++--- src/combinator/core.rs | 2 ++ src/token/mod.rs | 46 +++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 25 deletions(-) diff --git a/examples/arithmetic/parser_lexer.rs b/examples/arithmetic/parser_lexer.rs index 8bc2c685..c9b3dfe1 100644 --- a/examples/arithmetic/parser_lexer.rs +++ b/examples/arithmetic/parser_lexer.rs @@ -10,11 +10,10 @@ use winnow::{ combinator::alt, combinator::dispatch, combinator::fail, - combinator::peek, combinator::repeat, combinator::{delimited, preceded, terminated}, - token::any, token::one_of, + token::peek_any, }; #[derive(Debug, Clone)] @@ -109,14 +108,14 @@ pub(crate) fn lex(i: &mut &str) -> Result> { } fn token(i: &mut &str) -> Result { - dispatch! {peek(any); - '0'..='9' => digits.try_map(FromStr::from_str).map(Token::Value), - '(' => '('.value(Token::OpenParen), - ')' => ')'.value(Token::CloseParen), - '+' => '+'.value(Token::Oper(Oper::Add)), - '-' => '-'.value(Token::Oper(Oper::Sub)), - '*' => '*'.value(Token::Oper(Oper::Mul)), - '/' => '/'.value(Token::Oper(Oper::Div)), + dispatch! {peek_any; + Some('0'..='9') => digits.try_map(FromStr::from_str).map(Token::Value), + Some('(') => '('.value(Token::OpenParen), + Some(')') => ')'.value(Token::CloseParen), + Some('+') => '+'.value(Token::Oper(Oper::Add)), + Some('-') => '-'.value(Token::Oper(Oper::Sub)), + Some('*') => '*'.value(Token::Oper(Oper::Mul)), + Some('/') => '/'.value(Token::Oper(Oper::Div)), _ => fail, } .parse_next(i) diff --git a/examples/json/parser_dispatch.rs b/examples/json/parser_dispatch.rs index b62d33de..4cf04c97 100644 --- a/examples/json/parser_dispatch.rs +++ b/examples/json/parser_dispatch.rs @@ -7,11 +7,11 @@ use winnow::{ ascii::float, combinator::empty, combinator::fail, - combinator::peek, combinator::{alt, dispatch}, combinator::{delimited, preceded, separated_pair, terminated}, combinator::{repeat, separated}, error::{AddContext, ParserError, StrContext}, + token::peek_any, token::{any, none_of, take, take_while}, }; @@ -43,16 +43,16 @@ fn json_value<'i, E: ParserError> + AddContext, StrContext ) -> Result { // `dispatch` gives you `match`-like behavior compared to `alt` successively trying different // implementations. - dispatch!(peek(any); - 'n' => null.value(JsonValue::Null), - 't' => true_.map(JsonValue::Boolean), - 'f' => false_.map(JsonValue::Boolean), - '"' => string.map(JsonValue::Str), - '+' => float.map(JsonValue::Num), - '-' => float.map(JsonValue::Num), - '0'..='9' => float.map(JsonValue::Num), - '[' => array.map(JsonValue::Array), - '{' => object.map(JsonValue::Object), + dispatch!(peek_any; + Some('n') => null.value(JsonValue::Null), + Some('t') => true_.map(JsonValue::Boolean), + Some('f') => false_.map(JsonValue::Boolean), + Some('"') => string.map(JsonValue::Str), + Some('+') => float.map(JsonValue::Num), + Some('-') => float.map(JsonValue::Num), + Some('0'..='9') => float.map(JsonValue::Num), + Some('[') => array.map(JsonValue::Array), + Some('{') => object.map(JsonValue::Object), _ => fail, ) .parse_next(input) diff --git a/src/ascii/mod.rs b/src/ascii/mod.rs index d48a42a3..c1978e48 100644 --- a/src/ascii/mod.rs +++ b/src/ascii/mod.rs @@ -12,7 +12,6 @@ use crate::combinator::dispatch; use crate::combinator::empty; use crate::combinator::fail; use crate::combinator::opt; -use crate::combinator::peek; use crate::combinator::trace; use crate::error::Needed; use crate::error::ParserError; @@ -21,6 +20,7 @@ use crate::stream::{AsBStr, AsChar, ParseSlice, Stream, StreamIsPartial}; use crate::stream::{Compare, CompareResult}; use crate::token::any; use crate::token::one_of; +use crate::token::peek_any; use crate::token::take_until; use crate::token::take_while; use crate::Parser; @@ -1489,7 +1489,7 @@ where ::IterOffsets: Clone, I: AsBStr, { - dispatch! {opt(peek(any).map(AsChar::as_char)); + dispatch! {peek_any.map(|t| t.map(AsChar::as_char)); Some('N') | Some('n') => Caseless("nan").void(), Some('+') | Some('-') => (any, take_unsigned_float_or_exceptions).void(), _ => take_unsigned_float_or_exceptions, @@ -1509,7 +1509,7 @@ where ::IterOffsets: Clone, I: AsBStr, { - dispatch! {opt(peek(any).map(AsChar::as_char)); + dispatch! {peek_any.map(|t| t.map(AsChar::as_char)); Some('I') | Some('i') => (Caseless("inf"), opt(Caseless("inity"))).void(), Some('.') => ('.', digit1, take_exp).void(), _ => (digit1, opt(('.', opt(digit1))), take_exp).void(), @@ -1527,7 +1527,7 @@ where ::IterOffsets: Clone, I: AsBStr, { - dispatch! {opt(peek(any).map(AsChar::as_char)); + dispatch! {peek_any.map(|t| t.map(AsChar::as_char)); Some('E') | Some('e') => (one_of(['e', 'E']), opt(one_of(['+', '-'])), digit1).void(), _ => empty, } diff --git a/src/combinator/core.rs b/src/combinator/core.rs index 2c77c6fc..5d548ab7 100644 --- a/src/combinator/core.rs +++ b/src/combinator/core.rs @@ -88,6 +88,8 @@ where /// /// To lookahead and only advance on success, see [`opt`]. /// +/// To peek just a token (i.e. `peek(opt(any))`), see [`peek_token`][crate::token::peek_token]. +/// /// # Example /// /// ```rust diff --git a/src/token/mod.rs b/src/token/mod.rs index 2cb84938..bfc7431d 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -82,6 +82,52 @@ where }) } +/// Get the next token, if present, without advancing the input +/// +/// To peek with a [`Parser`], see [`peek`][crate::combiantor::peek]. +/// +/// # Effective Signature +/// +/// Assuming you are parsing a `&str` [Stream]: +/// ```rust +/// # use winnow::prelude::*;; +/// pub fn peek_any(input: &mut &str) -> ModalResult> +/// # { +/// # winnow::token::peek_any.parse_next(input) +/// # } +/// ``` +/// +/// # Example +/// +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::combinator::dispatch; +/// # use winnow::combinator::fail; +/// use winnow::token::peek_any; +/// +/// fn bool<'i>(input: &mut &'i str) -> ModalResult<&'i str> { +/// dispatch!(peek_any; +/// Some('t') => "true", +/// Some('f') => "false", +/// _ => fail, +/// ) +/// .parse_next(input) +/// } +/// +/// assert_eq!(bool.parse_peek("true;"), Ok((";", "true"))); +/// assert!(bool.parse_peek("123;").is_err()); +/// ``` +#[doc(alias = "look_ahead")] +#[doc(alias = "rewind")] +#[doc(alias = "peek_token")] +pub fn peek_any(input: &mut Input) -> Result::Token>, Error> +where + Input: Stream, + Error: ParserError, +{ + trace("peek_any", move |input: &mut Input| Ok(input.peek_token())).parse_next(input) +} + /// Recognizes a literal /// /// The input data will be compared to the literal combinator's argument and will return the part of From bb5f365e00130437a63a0d199f517960730b1d3c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 30 Jan 2025 08:57:33 -0600 Subject: [PATCH 2/2] fix(token): Make peek_any consistent with peek --- examples/arithmetic/parser_lexer.rs | 14 +++++++------- examples/json/parser_dispatch.rs | 18 +++++++++--------- src/ascii/mod.rs | 6 +++--- src/combinator/core.rs | 2 +- src/token/mod.rs | 21 +++++++++++++++------ 5 files changed, 35 insertions(+), 26 deletions(-) diff --git a/examples/arithmetic/parser_lexer.rs b/examples/arithmetic/parser_lexer.rs index c9b3dfe1..e36b3264 100644 --- a/examples/arithmetic/parser_lexer.rs +++ b/examples/arithmetic/parser_lexer.rs @@ -109,13 +109,13 @@ pub(crate) fn lex(i: &mut &str) -> Result> { fn token(i: &mut &str) -> Result { dispatch! {peek_any; - Some('0'..='9') => digits.try_map(FromStr::from_str).map(Token::Value), - Some('(') => '('.value(Token::OpenParen), - Some(')') => ')'.value(Token::CloseParen), - Some('+') => '+'.value(Token::Oper(Oper::Add)), - Some('-') => '-'.value(Token::Oper(Oper::Sub)), - Some('*') => '*'.value(Token::Oper(Oper::Mul)), - Some('/') => '/'.value(Token::Oper(Oper::Div)), + '0'..='9' => digits.try_map(FromStr::from_str).map(Token::Value), + '(' => '('.value(Token::OpenParen), + ')' => ')'.value(Token::CloseParen), + '+' => '+'.value(Token::Oper(Oper::Add)), + '-' => '-'.value(Token::Oper(Oper::Sub)), + '*' => '*'.value(Token::Oper(Oper::Mul)), + '/' => '/'.value(Token::Oper(Oper::Div)), _ => fail, } .parse_next(i) diff --git a/examples/json/parser_dispatch.rs b/examples/json/parser_dispatch.rs index 4cf04c97..a1e7831f 100644 --- a/examples/json/parser_dispatch.rs +++ b/examples/json/parser_dispatch.rs @@ -44,15 +44,15 @@ fn json_value<'i, E: ParserError> + AddContext, StrContext // `dispatch` gives you `match`-like behavior compared to `alt` successively trying different // implementations. dispatch!(peek_any; - Some('n') => null.value(JsonValue::Null), - Some('t') => true_.map(JsonValue::Boolean), - Some('f') => false_.map(JsonValue::Boolean), - Some('"') => string.map(JsonValue::Str), - Some('+') => float.map(JsonValue::Num), - Some('-') => float.map(JsonValue::Num), - Some('0'..='9') => float.map(JsonValue::Num), - Some('[') => array.map(JsonValue::Array), - Some('{') => object.map(JsonValue::Object), + 'n' => null.value(JsonValue::Null), + 't' => true_.map(JsonValue::Boolean), + 'f' => false_.map(JsonValue::Boolean), + '"' => string.map(JsonValue::Str), + '+' => float.map(JsonValue::Num), + '-' => float.map(JsonValue::Num), + '0'..='9' => float.map(JsonValue::Num), + '[' => array.map(JsonValue::Array), + '{' => object.map(JsonValue::Object), _ => fail, ) .parse_next(input) diff --git a/src/ascii/mod.rs b/src/ascii/mod.rs index c1978e48..81fa7f60 100644 --- a/src/ascii/mod.rs +++ b/src/ascii/mod.rs @@ -1489,7 +1489,7 @@ where ::IterOffsets: Clone, I: AsBStr, { - dispatch! {peek_any.map(|t| t.map(AsChar::as_char)); + dispatch! {opt(peek_any.map(AsChar::as_char)); Some('N') | Some('n') => Caseless("nan").void(), Some('+') | Some('-') => (any, take_unsigned_float_or_exceptions).void(), _ => take_unsigned_float_or_exceptions, @@ -1509,7 +1509,7 @@ where ::IterOffsets: Clone, I: AsBStr, { - dispatch! {peek_any.map(|t| t.map(AsChar::as_char)); + dispatch! {opt(peek_any.map(AsChar::as_char)); Some('I') | Some('i') => (Caseless("inf"), opt(Caseless("inity"))).void(), Some('.') => ('.', digit1, take_exp).void(), _ => (digit1, opt(('.', opt(digit1))), take_exp).void(), @@ -1527,7 +1527,7 @@ where ::IterOffsets: Clone, I: AsBStr, { - dispatch! {peek_any.map(|t| t.map(AsChar::as_char)); + dispatch! {opt(peek_any.map(AsChar::as_char)); Some('E') | Some('e') => (one_of(['e', 'E']), opt(one_of(['+', '-'])), digit1).void(), _ => empty, } diff --git a/src/combinator/core.rs b/src/combinator/core.rs index 5d548ab7..d6f11820 100644 --- a/src/combinator/core.rs +++ b/src/combinator/core.rs @@ -88,7 +88,7 @@ where /// /// To lookahead and only advance on success, see [`opt`]. /// -/// To peek just a token (i.e. `peek(opt(any))`), see [`peek_token`][crate::token::peek_token]. +/// To peek just a token (i.e. `peek(any)`), see [`peek_token`][crate::token::peek_token]. /// /// # Example /// diff --git a/src/token/mod.rs b/src/token/mod.rs index bfc7431d..082fa172 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -91,7 +91,7 @@ where /// Assuming you are parsing a `&str` [Stream]: /// ```rust /// # use winnow::prelude::*;; -/// pub fn peek_any(input: &mut &str) -> ModalResult> +/// pub fn peek_any(input: &mut &str) -> ModalResult /// # { /// # winnow::token::peek_any.parse_next(input) /// # } @@ -107,8 +107,8 @@ where /// /// fn bool<'i>(input: &mut &'i str) -> ModalResult<&'i str> { /// dispatch!(peek_any; -/// Some('t') => "true", -/// Some('f') => "false", +/// 't' => "true", +/// 'f' => "false", /// _ => fail, /// ) /// .parse_next(input) @@ -120,12 +120,21 @@ where #[doc(alias = "look_ahead")] #[doc(alias = "rewind")] #[doc(alias = "peek_token")] -pub fn peek_any(input: &mut Input) -> Result::Token>, Error> +pub fn peek_any(input: &mut Input) -> Result<::Token, Error> where - Input: Stream, + Input: StreamIsPartial + Stream, Error: ParserError, { - trace("peek_any", move |input: &mut Input| Ok(input.peek_token())).parse_next(input) + trace("peek_any", move |input: &mut Input| { + input.peek_token().ok_or_else(|| { + if ::is_partial_supported() && input.is_partial() { + ParserError::incomplete(input, Needed::new(1)) + } else { + ParserError::from_input(input) + } + }) + }) + .parse_next(input) } /// Recognizes a literal