-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: implement Pratt parsing #614
base: main
Are you sure you want to change the base?
Changes from 15 commits
fed8c90
ee4459d
4b1499d
acf4577
a816a1c
2a80e65
29fe18d
5a4f4b4
919a1cb
0273a29
f218911
3d7ef41
a6cbc1a
29b64fa
b31a3a3
33c82f3
040dd85
6d88dff
161f9da
d53a32e
4f690db
44546f2
a583d24
431b6f6
482a162
81ba185
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If this is going to start off unstable, then its fine noting most of my feedback in the "tracking" issue and not resolving all of it here |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,6 +166,8 @@ mod multi; | |
mod parser; | ||
mod sequence; | ||
|
||
pub mod precedence; | ||
|
||
#[cfg(test)] | ||
mod tests; | ||
|
||
|
@@ -174,6 +176,7 @@ pub use self::core::*; | |
pub use self::debug::*; | ||
pub use self::multi::*; | ||
pub use self::parser::*; | ||
pub use self::precedence::*; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are dumping a lot of stray types into |
||
pub use self::sequence::*; | ||
|
||
#[allow(unused_imports)] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
use crate::{ | ||
combinator::{opt, trace}, | ||
error::{ErrMode, ParserError}, | ||
stream::{Stream, StreamIsPartial}, | ||
PResult, Parser, | ||
}; | ||
|
||
/// Parses an expression based on operator precedence. | ||
#[doc(alias = "pratt")] | ||
#[doc(alias = "separated")] | ||
#[doc(alias = "shunting_yard")] | ||
#[doc(alias = "precedence_climbing")] | ||
#[inline(always)] | ||
pub fn precedence<I, ParseOperand, ParseInfix, ParsePrefix, ParsePostfix, Operand: 'static, E>( | ||
mut operand: ParseOperand, | ||
mut prefix: ParsePrefix, | ||
mut postfix: ParsePostfix, | ||
mut infix: ParseInfix, | ||
) -> impl Parser<I, Operand, E> | ||
where | ||
I: Stream + StreamIsPartial, | ||
ParseOperand: Parser<I, Operand, E>, | ||
ParseInfix: Parser< | ||
I, | ||
( | ||
usize, | ||
usize, | ||
fn(&mut I, Operand, Operand) -> PResult<Operand, E>, | ||
), | ||
E, | ||
>, | ||
ParsePrefix: Parser<I, (usize, fn(&mut I, Operand) -> PResult<Operand, E>), E>, | ||
ParsePostfix: Parser<I, (usize, fn(&mut I, Operand) -> PResult<Operand, E>), E>, | ||
E: ParserError<I>, | ||
{ | ||
trace("precedence", move |i: &mut I| { | ||
let result = precedence_impl(i, &mut operand, &mut prefix, &mut postfix, &mut infix, 0)?; | ||
Ok(result) | ||
}) | ||
} | ||
|
||
// recursive function | ||
fn precedence_impl<I, ParseOperand, ParseInfix, ParsePrefix, ParsePostfix, Operand: 'static, E>( | ||
i: &mut I, | ||
parse_operand: &mut ParseOperand, | ||
prefix: &mut ParsePrefix, | ||
postfix: &mut ParsePostfix, | ||
infix: &mut ParseInfix, | ||
min_power: usize, | ||
) -> PResult<Operand, E> | ||
where | ||
I: Stream + StreamIsPartial, | ||
ParseOperand: Parser<I, Operand, E>, | ||
ParseInfix: Parser< | ||
I, | ||
( | ||
usize, | ||
usize, | ||
fn(&mut I, Operand, Operand) -> PResult<Operand, E>, | ||
), | ||
E, | ||
>, | ||
ParsePrefix: Parser<I, (usize, fn(&mut I, Operand) -> PResult<Operand, E>), E>, | ||
ParsePostfix: Parser<I, (usize, fn(&mut I, Operand) -> PResult<Operand, E>), E>, | ||
E: ParserError<I>, | ||
{ | ||
let operand = opt(parse_operand.by_ref()).parse_next(i)?; | ||
let mut operand = if let Some(operand) = operand { | ||
operand | ||
} else { | ||
// Prefix unary operators | ||
let len = i.eof_offset(); | ||
let (power, fold_prefix) = prefix.parse_next(i)?; | ||
// infinite loop check: the parser must always consume | ||
if i.eof_offset() == len { | ||
return Err(ErrMode::assert(i, "`prefix` parsers must always consume")); | ||
} | ||
let operand = precedence_impl(i, parse_operand, prefix, postfix, infix, power)?; | ||
fold_prefix(i, operand)? | ||
}; | ||
|
||
'parse: while i.eof_offset() > 0 { | ||
// Postfix unary operators | ||
let start = i.checkpoint(); | ||
if let Some((power, fold_postfix)) = opt(postfix.by_ref()).parse_next(i)? { | ||
// control precedence over the prefix e.g.: | ||
// `--(i++)` or `(--i)++` | ||
if power < min_power { | ||
i.reset(&start); | ||
break; | ||
} | ||
operand = fold_postfix(i, operand)?; | ||
|
||
continue 'parse; | ||
} | ||
|
||
// Infix binary operators | ||
let start = i.checkpoint(); | ||
let parse_result = opt(infix.by_ref()).parse_next(i)?; | ||
if let Some((lpower, rpower, fold_infix)) = parse_result { | ||
if lpower < min_power { | ||
i.reset(&start); | ||
break; | ||
} | ||
let rhs = precedence_impl(i, parse_operand, prefix, postfix, infix, rpower)?; | ||
operand = fold_infix(i, operand, rhs)?; | ||
|
||
continue 'parse; | ||
} | ||
|
||
break 'parse; | ||
} | ||
|
||
Ok(operand) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::ascii::{digit1, space0}; | ||
use crate::combinator::{delimited, empty, fail, peek}; | ||
use crate::dispatch; | ||
use crate::error::ContextError; | ||
use crate::token::any; | ||
|
||
use super::*; | ||
|
||
fn factorial(x: i32) -> i32 { | ||
if x == 0 { | ||
1 | ||
} else { | ||
x * factorial(x - 1) | ||
} | ||
} | ||
fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> { | ||
move |i: &mut &str| { | ||
precedence( | ||
trace( | ||
"operand", | ||
delimited( | ||
space0, | ||
dispatch! {peek(any); | ||
'(' => delimited('(', parser(), ')'), | ||
_ => digit1.parse_to::<i32>() | ||
}, | ||
space0, | ||
), | ||
), | ||
trace( | ||
"prefix", | ||
dispatch! {any; | ||
'+' => empty.value((9, (|_: &mut _, a| Ok(a)) as _)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I forgot to ask, why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The compiler don't want to cast There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need it to be a function pointer rather than a I assume #618 (comment) is related. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah it is related. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we provided an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just checked. Yes this simple guidance works fn infix<I, O>(
a: Assoc,
f: fn(&mut I, O, O) -> PResult<O>,
) -> (
Assoc,
fn(&mut I, O, O) -> PResult<O>,
) {
(a, f)
}
...
"*".value(infix(Assoc::Right(28), |_: &mut _, a, b| Ok(Expr::Pow(Box::new(a), Box::new(b))))) |
||
'-' => empty.value((9, (|_: &mut _, a: i32| Ok(-a)) as _)), | ||
_ => fail | ||
}, | ||
), | ||
trace( | ||
"postfix", | ||
dispatch! {any; | ||
'!' => empty.value((9, (|_: &mut _, a| {Ok(factorial(a))}) as _)), | ||
_ => fail | ||
}, | ||
), | ||
trace( | ||
"infix", | ||
dispatch! {any; | ||
'+' => empty.value((5, 6, (|_: &mut _, a, b| Ok(a + b)) as _ )), | ||
'-' => empty.value((5, 6, (|_: &mut _, a, b| Ok(a - b)) as _)), | ||
'*' => empty.value((7, 8, (|_: &mut _, a, b| Ok(a * b)) as _)), | ||
'/' => empty.value((7, 8, (|_: &mut _, a, b| Ok(a / b)) as _)), | ||
'%' => empty.value((7, 8, (|_: &mut _, a, b| Ok(a % b)) as _)), | ||
'^' => empty.value((9, 10, (|_: &mut _, a, b| Ok(a ^ b)) as _)), | ||
_ => fail | ||
}, | ||
), | ||
) | ||
.parse_next(i) | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_precedence() { | ||
assert_eq!(parser().parse("-3!+-3 * 4"), Ok(-18)); | ||
assert_eq!(parser().parse("+2 + 3 * 4"), Ok(14)); | ||
assert_eq!(parser().parse("2 * 3+4"), Ok(10)); | ||
} | ||
#[test] | ||
fn test_unary() { | ||
assert_eq!(parser().parse("-2"), Ok(-2)); | ||
assert_eq!(parser().parse("4!"), Ok(24)); | ||
assert_eq!(parser().parse("2 + 4!"), Ok(26)); | ||
assert_eq!(parser().parse("-2 + 2"), Ok(0)); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks to be agnostic of streaming support like
separated
is