From c88a623574a886df6855b74817a10cff1e536c52 Mon Sep 17 00:00:00 2001 From: Vilhjalmur Thorsteinsson Date: Thu, 19 Nov 2020 00:37:09 +0000 Subject: [PATCH] Type annotations and code cleanup in baseparser/fastparser/grammar.py --- src/reynir/baseparser.py | 20 +++--- src/reynir/fastparser.py | 10 +-- src/reynir/grammar.py | 147 ++++++++++++++++++++++----------------- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/src/reynir/baseparser.py b/src/reynir/baseparser.py index bedcaddd..4a5f7f26 100644 --- a/src/reynir/baseparser.py +++ b/src/reynir/baseparser.py @@ -32,9 +32,9 @@ """ -from typing import Union, Dict, List, Optional +from typing import Union, Dict, List, Iterator, Optional -from .grammar import Grammar, Terminal, Nonterminal +from .grammar import Grammar, GrammarItem, Terminal, Nonterminal, Production class _PackedProduction: @@ -43,7 +43,7 @@ class _PackedProduction: where the component terminals and nonterminals have been packed into a list of integer indices """ - def __init__(self, priority, production): + def __init__(self, priority: int, production: Production) -> None: # Store the relative priority of this production within its nonterminal self._priority = priority # Keep a reference to the original production @@ -54,20 +54,20 @@ def __init__(self, priority, production): self._len = len(self._ix_list) @property - def production(self): + def production(self) -> Production: return self._production @property - def priority(self): + def priority(self) -> int: return self._priority - def __getitem__(self, index): + def __getitem__(self, index: int) -> int: return self._ix_list[index] if 0 <= index < self._len else 0 - def __len__(self): + def __len__(self) -> int: return self._len - def __iter__(self): + def __iter__(self) -> Iterator[int]: return iter(self._ix_list) @@ -79,7 +79,7 @@ class Base_Parser: """ def __init__(self) -> None: - self._root = None + self._root: Optional[int] = None self._nt_dict: Dict[int, Optional[List[_PackedProduction]]] = {} self._nonterminals: Dict[int, Nonterminal] = {} self._terminals: Dict[int, Terminal] = {} @@ -112,7 +112,7 @@ def for_grammar(cls, g: Grammar) -> "Base_Parser": p.init_from_grammar(g) return p - def _lookup(self, ix: int) -> Union[Terminal, Nonterminal]: + def _lookup(self, ix: int) -> GrammarItem: """ Convert a production item from an index to an object reference """ # Terminals have positive indices # Nonterminals have negative indices diff --git a/src/reynir/fastparser.py b/src/reynir/fastparser.py index 3e70125a..d8f1da70 100644 --- a/src/reynir/fastparser.py +++ b/src/reynir/fastparser.py @@ -249,11 +249,11 @@ def __init__(self, start: int, end: int) -> None: # Priority of highest-priority child family self._highest_prio = 0 # The nonterminal corresponding to this node, if not a leaf node - self._nonterminal = None + self._nonterminal: Optional[Nonterminal] = None # The terminal corresponding to this node, if it is a leaf node - self._terminal = None + self._terminal: Optional[Terminal] = None # The token matching this terminal, if this is a leaf node - self._token = None + self._token: Optional[Token] = None # If completed is True, this node represents a completed nonterminal. # Otherwise, it is an internal node representing a position within # a production of a nonterminal. @@ -282,13 +282,13 @@ def from_c_node( if lb.iNt >= 0: # Token node: find the corresponding terminal tix = parent.pList[index] - node._terminal = job.grammar.lookup(tix) + node._terminal = job.grammar.lookup_terminal(tix) node._token = job.tokens[lb.iNt] return node # Nonterminal node nt = lb.iNt - node._nonterminal = job.grammar.lookup(nt) + node._nonterminal = job.grammar.lookup_nonterminal(nt) node._completed = lb.pProd == ffi.NULL # Cache nonterminal nodes job.c_dict[c_node] = node diff --git a/src/reynir/grammar.py b/src/reynir/grammar.py index 20f8fba0..02180bb6 100644 --- a/src/reynir/grammar.py +++ b/src/reynir/grammar.py @@ -62,7 +62,18 @@ """ -from typing import List, Sequence, Dict, Set, Tuple, Iterator, Optional, Union, Any +from typing import ( + List, + Sequence, + Dict, + Set, + Tuple, + Iterable, + Iterator, + Optional, + Union, + Any, +) import os import struct @@ -121,11 +132,12 @@ def __init__(self) -> None: @property def literal_text(self) -> str: + """ The literal text of a terminal, if it is a literal terminal """ return "" @property def index(self) -> int: - """ Return the (negative) sequence number of this nonterminal """ + """ Return the sequence number of this grammar item """ return self._index def set_index(self, ix: int) -> None: @@ -242,9 +254,9 @@ def __init__(self, name: str) -> None: super().__init__() self._name = name self._index = Terminal._INDEX + Terminal._INDEX += 1 # The hash is used quite often so it is worth caching self._hash = id(self).__hash__() - Terminal._INDEX += 1 def __hash__(self) -> int: return self._hash @@ -320,7 +332,7 @@ class Token: """ A single input token as seen by the parser """ - def __init__(self, kind: str, val: str, lit: Optional[str] = None): + def __init__(self, kind: str, val: str, lit: Optional[str] = None) -> None: """ A basic token has a kind, a canonical value and an optional literal value, all strings """ self._kind = kind @@ -388,7 +400,7 @@ def __init__( self._tuple: Optional[Tuple[int, ...]] = None @classmethod - def reset(cls): + def reset(cls) -> None: """ Reset the production index sequence to zero """ cls._INDEX = 0 @@ -400,63 +412,63 @@ def append(self, t: GrammarItem) -> None: self._tuple = None @property - def index(self): + def index(self) -> int: return self._index @property - def length(self): + def length(self) -> int: """ Return the length of this production """ return self._len @property - def is_empty(self): + def is_empty(self) -> bool: """ Return True if this is an empty (epsilon) production """ return self._len == 0 @property - def fname(self): + def fname(self) -> Optional[str]: return self._fname @property - def line(self): + def line(self) -> int: return self._line @property - def priority(self): + def priority(self) -> int: return self._priority @property - def prod(self): + def prod(self) -> Tuple[int, ...]: """ Return this production in tuple form """ if self._tuple is None: # Nonterminals have negative indices and terminals have positive ones self._tuple = tuple(t.index for t in self._rhs) if self._rhs else tuple() return self._tuple - def nonterminal_at(self, dot): + def nonterminal_at(self, dot: int) -> bool: """ Return True if prod[dot] is a nonterminal or completed """ return dot >= self._len or isinstance(self._rhs[dot], Nonterminal) - def __getitem__(self, index): + def __getitem__(self, index: int) -> GrammarItem: """ Return the Terminal or Nonterminal at the given index position """ return self._rhs[index] - def __setitem__(self, index, val): + def __setitem__(self, index: int, item: GrammarItem): """ Set the Terminal or Nonterminal at the given index position """ - self._rhs[index] = val + self._rhs[index] = item - def __len__(self): + def __len__(self) -> int: """ Return the length of this production """ return self._len - def __iter__(self): + def __iter__(self) -> Iterator[GrammarItem]: return iter(self._rhs) - def __repr__(self): + def __repr__(self) -> str: """ Return a representation of this production """ return "" - def __str__(self): + def __str__(self) -> str: """ Return a representation of this production """ return " ".join([str(t) for t in self._rhs]) if self._rhs else "0" @@ -514,9 +526,8 @@ def __init__(self) -> None: self._nt_scores: Dict[Nonterminal, int] = {} self._root: Optional[Nonterminal] = None - self._secondary_roots: List[ - Nonterminal - ] = [] # Additional, secondary roots, if any + # Additional, secondary roots, if any + self._secondary_roots: List[Nonterminal] = [] # Information about the grammar file self._file_name: Optional[str] = None @@ -527,120 +538,128 @@ def __init__(self) -> None: self._conditions: Set[str] = set() @property - def nt_dict(self): + def nt_dict(self) -> Dict[Nonterminal, List[ProductionTuple]]: """ Return the raw grammar dictionary, Nonterminal -> [ Productions ] """ return self._nt_dict - def nt_score(self, nt): + def nt_score(self, nt: Nonterminal) -> int: """ Return the score adjustment for the given nonterminal """ return self._nt_scores.get(nt, 0) @property - def root(self): + def root(self) -> Optional[Nonterminal]: """ Return the root nonterminal for this grammar """ return self._root @property - def terminals(self): + def terminals(self) -> Dict[str, Terminal]: """ Return a dictionary of terminals in the grammar """ return self._terminals @property - def nonterminals(self): + def nonterminals(self) -> Dict[str, Nonterminal]: """ Return a dictionary of nonterminals in the grammar """ return self._nonterminals @property - def nonterminals_by_ix(self): + def nonterminals_by_ix(self) -> Dict[int, Nonterminal]: """ Return a dictionary of nonterminals in the grammar, indexed by integer < 0 """ return self._nonterminals_by_ix @property - def terminals_by_ix(self): + def terminals_by_ix(self) -> Dict[int, Terminal]: """ Return a dictionary of terminals in the grammar, indexed by integer > 0 """ return self._terminals_by_ix @property - def productions_by_ix(self): + def productions_by_ix(self) -> Dict[int, Production]: """ Return a dictionary of productions in the grammar, indexed by integer >= 0 """ return self._productions_by_ix - def lookup(self, index): + def lookup(self, index: int) -> Optional[GrammarItem]: """ Look up a nonterminal or terminal by integer index """ if index < 0: - return self._nonterminals_by_ix.get(index, None) + return self._nonterminals_by_ix.get(index) if index > 0: - return self._terminals_by_ix.get(index, None) + return self._terminals_by_ix.get(index) # index == 0 return None + def lookup_nonterminal(self, index: int) -> Optional[Nonterminal]: + """ Look up a nonterminal by (negative) integer index """ + return None if index >= 0 else self._nonterminals_by_ix.get(index) + + def lookup_terminal(self, index: int) -> Optional[Terminal]: + """ Look up a terminal by (positive) integer index """ + return None if index <= 0 else self._terminals_by_ix.get(index) + @property - def num_nonterminals(self): + def num_nonterminals(self) -> int: """ Return the number of nonterminals in the grammar """ return len(self._nonterminals) @property - def num_terminals(self): + def num_terminals(self) -> int: """ Return the number of terminals in the grammar """ return len(self._terminals) @property - def num_productions(self): + def num_productions(self) -> int: """ Return the total number of productions in the grammar, were each right hand side option is counted as one """ return sum(len(pp) for pp in self._nt_dict.values()) @property - def file_name(self): + def file_name(self) -> Optional[str]: """ Return the name of the grammar file, or None """ return self._file_name @property - def file_time(self): + def file_time(self) -> Optional[datetime]: """ Return the timestamp of the grammar file, or None """ return self._file_time - def set_conditions(self, cond_set): + def set_conditions(self, cond_set: Set[str]) -> None: """ Set the parsing conditions for this grammar, checkable with $if()...$endif() """ self._conditions = cond_set - def __getitem__(self, nt): + def __getitem__(self, nt: Nonterminal) -> List[ProductionTuple]: """ Look up a nonterminal, yielding a list of (priority, production) tuples """ return self._nt_dict[nt] - def __str__(self): + def __str__(self) -> str: """ Return a string representation of this grammar """ - def to_str(plist): - return " | ".join([str(p) for p in plist]) + def to_str(plist: Iterable[ProductionTuple]) -> str: + return " | ".join([str(p) for _, p in plist]) return "\n".join( - [str(nt) + " → " + to_str(pp[1]) + "\n" for nt, pp in self._nt_dict.items()] + [str(nt) + " → " + to_str(ptl) + "\n" for nt, ptl in self._nt_dict.items()] ) @staticmethod - def _make_terminal(name): + def _make_terminal(name: str) -> Terminal: """ Create a new Terminal instance within the grammar """ # Override this to create custom terminals or add optimizations return Terminal(name) @staticmethod - def _make_literal_terminal(name): + def _make_literal_terminal(name: str) -> LiteralTerminal: """ Create a new LiteralTerminal instance within the grammar """ # Override this to create custom terminals or add optimizations return LiteralTerminal(name) @staticmethod - def _make_nonterminal(name, fname, line): + def _make_nonterminal(name: str, fname: str, line: int) -> Nonterminal: """ Create a new Nonterminal instance within the grammar """ # Override this to create custom nonterminals or add optimizations return Nonterminal(name, fname, line) - def _write_binary(self, fname): + def _write_binary(self, fname: str) -> None: """ Write grammar to binary file. Called after reading a grammar text file that is newer than the corresponding binary file. """ with open(fname, "wb") as f: @@ -652,12 +671,13 @@ def _write_binary(self, fname): # Number of terminals and nonterminals in grammar f.write(struct.pack(" None: """ Read grammar from a text file. Set verbose=True to get diagnostic messages about unused nonterminals and nonterminals that are unreachable from the root. @@ -683,7 +705,7 @@ def read(self, fname, verbose=False, binary_fname=None): try: with open(fname, "r", encoding="utf-8") as inp: # Read grammar file line-by-line - return self.read_from_generator(fname, inp, verbose, binary_fname) + self.read_from_generator(fname, inp, verbose, binary_fname) except (IOError, OSError): raise GrammarError("Unable to open or read grammar file", fname, 0) @@ -691,9 +713,9 @@ def read_from_generator( self, fname: str, line_generator: Iterator[str], - verbose=False, - binary_fname=None, - force_new_binary=False, + verbose: bool = False, + binary_fname: Optional[str] = None, + force_new_binary: bool = False, ) -> None: """ Read grammar from a generator of lines. Set verbose=True to get diagnostic messages about unused nonterminals and nonterminals that are @@ -1352,10 +1374,10 @@ def set_score(nt, score): # Go through all productions and replace the shortcuts with their targets for nt, plist in grammar.items(): for _, p in plist: - for ix, s in enumerate(p): - if isinstance(s, Nonterminal) and s in shortcuts: + for ix, item in enumerate(p): + if isinstance(item, Nonterminal) and item in shortcuts: # Replace the nonterminal in the production - target = shortcuts[s] + target = shortcuts[item] # if verbose: # # Print informational message in verbose mode # print("Production of {2}: Replaced {0} with {1}" @@ -1366,7 +1388,7 @@ def set_score(nt, score): # are reachable from the root unreachable = {nt for nt in nonterminals.values()} - def _remove(nt): + def _remove(nt: Nonterminal) -> None: """ Recursively remove all nonterminals that are reachable from nt """ unreachable.remove(nt) for _, p in grammar[nt]: @@ -1375,9 +1397,10 @@ def _remove(nt): _remove(s) # Remove the main root and any secondary roots - _remove(self._root) - for r in self._secondary_roots: - _remove(r) + if self._root is not None: + _remove(self._root) + for r in self._secondary_roots: + _remove(r) if unreachable: if verbose: