From 8d4943488b0ec02080d059368409c76f512dedd5 Mon Sep 17 00:00:00 2001 From: Sebastian Walter Date: Thu, 7 Mar 2024 20:27:03 +0100 Subject: [PATCH] unify exact and regular lr1 constraint in a single python interface --- src/grammar.rs | 120 ++++++++++++++++++++++++++++------ text-utils-grammar/src/lr1.rs | 72 +++++++++----------- 2 files changed, 132 insertions(+), 60 deletions(-) diff --git a/src/grammar.rs b/src/grammar.rs index 998acc9..82224d2 100644 --- a/src/grammar.rs +++ b/src/grammar.rs @@ -5,8 +5,8 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList}; use text_utils_grammar::{ - Constraint, ExactLR1GrammarConstraint, LR1GrammarParser, LR1NextState, LR1Parse, LR1State, - RegularExpressionConstraint, RegularExpressionState, + Constraint, ExactLR1GrammarConstraint, LR1GrammarConstraint, LR1GrammarParser, LR1NextState, + LR1Parse, LR1State, RegularExpressionConstraint, RegularExpressionState, }; #[pyclass] @@ -113,21 +113,90 @@ impl RegexConstraint { } } +enum LR1Type { + Exact(ExactLR1GrammarConstraint), + Regular(LR1GrammarConstraint), +} + +#[derive(Clone)] +enum LR1NextStates { + Exact(Vec), + Regular(Vec), +} + #[pyclass] -struct ExactLR1Constraint { - inner: Arc, +struct LR1Constraint { + inner: Arc, state: LR1State, indices: Vec, is_match: bool, - next_states: Vec, + next_states: LR1NextStates, +} + +impl LR1Type { + fn get_state(&self, prefix: &[u8]) -> Option { + match self { + LR1Type::Exact(inner) => inner.get_state(prefix), + LR1Type::Regular(inner) => inner.get_state(prefix), + } + } + + fn get_start_state(&self) -> LR1State { + match self { + LR1Type::Exact(inner) => inner.get_start_state(), + LR1Type::Regular(inner) => inner.get_start_state(), + } + } + + fn get_valid_continuations_with_state(&self, state: &LR1State) -> (Vec, LR1NextStates) { + match self { + LR1Type::Exact(inner) => { + let (indices, next_states) = inner.get_valid_continuations_with_state(state); + (indices, LR1NextStates::Exact(next_states)) + } + LR1Type::Regular(inner) => { + let (indices, next_states) = inner.get_valid_continuations_with_state(state); + (indices, LR1NextStates::Regular(next_states)) + } + } + } + + fn is_match_state(&self, state: &LR1State) -> bool { + match self { + LR1Type::Exact(inner) => inner.is_match_state(state), + LR1Type::Regular(inner) => inner.is_match_state(state), + } + } + + fn only_skippable_matching(&self, state: &LR1State) -> bool { + match self { + LR1Type::Exact(inner) => inner.only_skippable_matching(state), + LR1Type::Regular(inner) => inner.only_skippable_matching(state), + } + } } #[pymethods] -impl ExactLR1Constraint { +impl LR1Constraint { #[new] - fn new(grammar: &str, lexer: &str, continuations: Vec>) -> anyhow::Result { - let inner = ExactLR1GrammarConstraint::new(grammar, lexer, continuations) - .map_err(|e| anyhow!("failed to create LR(1) grammar constraint: {}", e))?; + #[pyo3(signature = (grammar, lexer, continuations, exact=false))] + fn new( + grammar: &str, + lexer: &str, + continuations: Vec>, + exact: bool, + ) -> anyhow::Result { + let inner = if exact { + LR1Type::Exact( + ExactLR1GrammarConstraint::new(grammar, lexer, continuations) + .map_err(|e| anyhow!("failed to create LR(1) grammar constraint: {}", e))?, + ) + } else { + LR1Type::Regular( + LR1GrammarConstraint::new(grammar, lexer, continuations) + .map_err(|e| anyhow!("failed to create LR(1) grammar constraint: {}", e))?, + ) + }; let state = inner.get_start_state(); let (indices, next_states) = inner.get_valid_continuations_with_state(&state); let is_match = inner.is_match_state(&state); @@ -141,20 +210,24 @@ impl ExactLR1Constraint { } #[staticmethod] + #[pyo3(signature = (grammar_path, lexer_path, continuations, exact=false))] fn from_files( grammar_path: &str, lexer_path: &str, continuations: Vec>, + exact: bool, ) -> anyhow::Result { - let inner = ExactLR1GrammarConstraint::from_files(grammar_path, lexer_path, continuations) - .map_err(|e| { - anyhow!( - "failed to create LR(1) grammar constraint from files {} and {}: {}", - grammar_path, - lexer_path, - e - ) - })?; + let inner = if exact { + LR1Type::Exact( + ExactLR1GrammarConstraint::from_files(grammar_path, lexer_path, continuations) + .map_err(|e| anyhow!("failed to create LR(1) grammar constraint: {}", e))?, + ) + } else { + LR1Type::Regular( + LR1GrammarConstraint::from_files(grammar_path, lexer_path, continuations) + .map_err(|e| anyhow!("failed to create LR(1) grammar constraint: {}", e))?, + ) + }; let state = inner.get_start_state(); let (indices, next_states) = inner.get_valid_continuations_with_state(&state); let is_match = inner.is_match_state(&state); @@ -208,7 +281,14 @@ impl ExactLR1Constraint { self.indices ) })?; - self.state.next(std::mem::take(&mut self.next_states[idx])); + match &mut self.next_states { + LR1NextStates::Exact(states) => { + self.state.next(std::mem::take(&mut states[idx])); + } + LR1NextStates::Regular(states) => { + self.state = std::mem::take(&mut states[idx]); + } + } let (indices, states) = self.inner.get_valid_continuations_with_state(&self.state); self.indices = indices; self.next_states = states; @@ -350,7 +430,7 @@ fn parse_into_py( pub(super) fn add_submodule(py: Python, parent_module: &PyModule) -> PyResult<()> { let m = PyModule::new(py, "grammar")?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; m.add_class::()?; parent_module.add_submodule(m)?; diff --git a/text-utils-grammar/src/lr1.rs b/text-utils-grammar/src/lr1.rs index d5c53bc..6ac82c7 100644 --- a/text-utils-grammar/src/lr1.rs +++ b/text-utils-grammar/src/lr1.rs @@ -659,6 +659,31 @@ fn is_accept_state(grammar: &YaccGrammar, table: &StateTable, stack: &[StId ) } +fn is_match_state( + grammar: &YaccGrammar, + table: &StateTable, + pdfas: &[(PrefixDFA, Option>)], + state: &LR1State, +) -> bool { + is_accept_state(grammar, table, &state.stack) + || state.matching.iter().any(|&(pidx, pdfa_state)| { + let (pdfa, Some(token)) = &pdfas[pidx] else { + return false; + }; + if !pdfa.is_match_state(pdfa_state) { + return false; + } + let LR1Action::ShiftReduce(keep, stidx) = + shift_reduce(grammar, table, &state.stack, *token) + else { + return false; + }; + let mut stack = state.stack[..keep].to_vec(); + stack.push(stidx); + is_accept_state(grammar, table, &stack) + }) +} + impl ExactLR1GrammarConstraint { pub fn new( grammar: &str, @@ -699,7 +724,7 @@ impl ExactLR1GrammarConstraint { } } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct LR1State { stack: Vec>, matching: Matching, @@ -756,27 +781,7 @@ impl Constraint for ExactLR1GrammarConstraint { } fn is_match_state(&self, state: &Self::State) -> bool { - is_accept_state(&self.grammar, &self.table, &state.stack) - || state.matching.iter().any(|&(pidx, pdfa_state)| { - let (pdfa, token) = &self.pdfas[pidx]; - if !pdfa.is_match_state(pdfa_state) { - return false; - } - let Some(token) = token else { - // a skippable token would not change anything here, - // as the check for accept state would already have - // returned true - return false; - }; - let LR1Action::ShiftReduce(keep, stidx) = - shift_reduce(&self.grammar, &self.table, &state.stack, *token) - else { - return false; - }; - let mut stack = state.stack[..keep].to_vec(); - stack.push(stidx); - is_accept_state(&self.grammar, &self.table, &stack) - }) + is_match_state(&self.grammar, &self.table, &self.pdfas, state) } fn get_valid_continuations_with_state( @@ -954,6 +959,10 @@ impl LR1GrammarConstraint { let tokens = read_to_string(file)?; Self::new(&grammar, &tokens, continuations) } + + pub fn only_skippable_matching(&self, state: &LR1State) -> bool { + only_skippable_matching(&state.matching, &self.pdfas) + } } impl Constraint for LR1GrammarConstraint { @@ -986,24 +995,7 @@ impl Constraint for LR1GrammarConstraint { } fn is_match_state(&self, state: &Self::State) -> bool { - is_accept_state(&self.grammar, &self.table, &state.stack) - || state.matching.iter().any(|&(pidx, pdfa_state)| { - let (pdfa, token) = &self.pdfas[pidx]; - if !pdfa.is_match_state(pdfa_state) { - return false; - } - let Some(token) = token else { - return false; - }; - let LR1Action::ShiftReduce(keep, stidx) = - shift_reduce(&self.grammar, &self.table, &state.stack, *token) - else { - return false; - }; - let mut stack = state.stack[..keep].to_vec(); - stack.push(stidx); - is_accept_state(&self.grammar, &self.table, &stack) - }) + is_match_state(&self.grammar, &self.table, &self.pdfas, state) } fn get_valid_continuations_with_state(