From 5ac61e22812ba436f26aea544727db773883bd2b Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 7 Dec 2023 10:15:03 -0600 Subject: [PATCH] WIP: feat: Error recovery support Fixes #96 --- src/stream/mod.rs | 434 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) diff --git a/src/stream/mod.rs b/src/stream/mod.rs index ea910e71..7420fd60 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -148,6 +148,56 @@ impl crate::lib::std::fmt::Display for Located } } +/// Allow recovering from parse errors, capturing them as the parser continues +#[derive(Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Recoverable { + input: I, + errors: Vec, + is_recoverable: bool, +} + +impl Recoverable { + /// Track recoverable errors with the stream + pub fn new(input: I) -> Self { + Self { + input, + errors: Default::default(), + is_recoverable: true, + } + } + + /// Act as a normal stream + pub fn unrecoverable(input: I) -> Self { + Self { + input, + errors: Default::default(), + is_recoverable: false, + } + } +} + +impl AsRef for Recoverable { + #[inline(always)] + fn as_ref(&self) -> &I { + &self.input + } +} + +impl crate::lib::std::ops::Deref for Recoverable { + type Target = I; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.input + } +} + +impl crate::lib::std::fmt::Display for Recoverable { + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + self.input.fmt(f) + } +} + /// Thread global state through your parsers /// /// Use cases @@ -415,6 +465,16 @@ where } } +impl SliceLen for Recoverable +where + I: SliceLen, +{ + #[inline(always)] + fn slice_len(&self) -> usize { + self.input.slice_len() + } +} + impl SliceLen for Stateful where I: SliceLen, @@ -982,6 +1042,59 @@ impl Stream for Located { } } +impl Stream for Recoverable { + type Token = ::Token; + type Slice = ::Slice; + + type IterOffsets = ::IterOffsets; + + type Checkpoint = Checkpoint; + + #[inline(always)] + fn iter_offsets(&self) -> Self::IterOffsets { + self.input.iter_offsets() + } + #[inline(always)] + fn eof_offset(&self) -> usize { + self.input.eof_offset() + } + + #[inline(always)] + fn next_token(&mut self) -> Option { + self.input.next_token() + } + + #[inline(always)] + fn offset_for

(&self, predicate: P) -> Option + where + P: Fn(Self::Token) -> bool, + { + self.input.offset_for(predicate) + } + #[inline(always)] + fn offset_at(&self, tokens: usize) -> Result { + self.input.offset_at(tokens) + } + #[inline(always)] + fn next_slice(&mut self, offset: usize) -> Self::Slice { + self.input.next_slice(offset) + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.input.checkpoint()) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + self.input.reset(checkpoint.0); + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + &self.input + } +} + impl Stream for Stateful { type Token = ::Token; type Slice = ::Slice; @@ -1104,6 +1217,16 @@ where } } +impl Location for Recoverable +where + I: Location, +{ + #[inline(always)] + fn location(&self) -> usize { + self.input.location() + } +} + impl Location for Stateful where I: Location, @@ -1124,6 +1247,212 @@ where } } +/// Capture top-level errors in the middle of parsing so parsing can resume +pub trait Recover { + /// Capture a top-level error + /// + /// May return `err` if recovery is not possible (e.g. if [`Recover::is_recovery_supported`] + /// returns `false`). + fn record_err( + &mut self, + token_start: I::Checkpoint, + err_start: I::Checkpoint, + err: E, + ) -> Result<(), E>; + + /// Report whether the [`Stream`] can save off errors for recovery + fn is_recovery_supported() -> bool; +} + +impl<'a, T, E> Recover for &'a [T] +where + &'a [T]: Stream, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl<'a, E> Recover for &'a str { + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl<'a, E> Recover for &'a Bytes { + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl<'a, E> Recover for &'a BStr { + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl Recover for (I, usize) +where + I: Recover, + I: Stream + Clone, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl Recover for Located +where + I: Recover, + I: Stream, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl Recover for Recoverable +where + I: Recover, + I: Stream, +{ + fn record_err( + &mut self, + token_start: ::Checkpoint, + err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + if self.is_recoverable { + Err(err) + } else { + Err(err) + } + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + true + } +} + +impl Recover for Stateful +where + I: Recover, + I: Stream, + S: Clone + crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +impl Recover for Partial +where + I: Recover, + I: Stream, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: ::Checkpoint, + _err_start: ::Checkpoint, + err: E, + ) -> Result<(), E> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + /// Marks the input as being the complete buffer or a partial buffer for streaming input /// /// See [`Partial`] for marking a presumed complete buffer type as a streaming buffer. @@ -1256,6 +1585,31 @@ where } } +impl StreamIsPartial for Recoverable +where + I: StreamIsPartial, +{ + type PartialState = I::PartialState; + + fn complete(&mut self) -> Self::PartialState { + self.input.complete() + } + + fn restore_partial(&mut self, state: Self::PartialState) { + self.input.restore_partial(state); + } + + #[inline(always)] + fn is_partial_supported() -> bool { + I::is_partial_supported() + } + + #[inline(always)] + fn is_partial(&self) -> bool { + self.input.is_partial() + } +} + impl StreamIsPartial for Stateful where I: StreamIsPartial, @@ -1421,6 +1775,28 @@ where } } +impl Offset for Recoverable +where + I: Stream, + E: crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn offset_from(&self, other: &Self) -> usize { + self.offset_from(&other.checkpoint()) + } +} + +impl Offset< as Stream>::Checkpoint> for Recoverable +where + I: Stream, + E: crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn offset_from(&self, other: & as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) + } +} + impl Offset for Stateful where I: Stream, @@ -1503,6 +1879,16 @@ where } } +impl AsBytes for Recoverable +where + I: AsBytes, +{ + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + self.input.as_bytes() + } +} + impl AsBytes for Stateful where I: AsBytes, @@ -1560,6 +1946,16 @@ where } } +impl AsBStr for Recoverable +where + I: AsBStr, +{ + #[inline(always)] + fn as_bstr(&self) -> &[u8] { + self.input.as_bstr() + } +} + impl AsBStr for Stateful where I: AsBStr, @@ -1878,6 +2274,22 @@ where } } +impl Compare for Recoverable +where + I: Compare, +{ + #[inline(always)] + fn compare(&self, other: U) -> CompareResult { + self.input.compare(other) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, other: U) -> CompareResult { + self.input.compare_no_case(other) + } +} + impl Compare for Stateful where I: Compare, @@ -2139,6 +2551,16 @@ where } } +impl FindSlice for Recoverable +where + I: FindSlice, +{ + #[inline(always)] + fn find_slice(&self, substr: T) -> Option { + self.input.find_slice(substr) + } +} + impl FindSlice for Stateful where I: FindSlice, @@ -2230,6 +2652,18 @@ where } } +impl UpdateSlice for Recoverable +where + I: UpdateSlice, + E: crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn update_slice(mut self, inner: Self::Slice) -> Self { + self.input = I::update_slice(self.input, inner); + self + } +} + impl UpdateSlice for Stateful where I: UpdateSlice,