From 06f2a937e285345a15a846fe2114d8f549c9ea84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Bodn=C3=A1r?= Date: Sat, 27 Jan 2024 08:53:24 +0100 Subject: [PATCH] feat!: add parent property to tokens (#71) At the same time, we also make the `children` attribute a _property_. This makes it easier for us to set the `parent` to every child token, without having to change much of the existing code. --- dev-guide.md | 4 ++++ mistletoe/ast_renderer.py | 2 +- mistletoe/html_renderer.py | 2 +- mistletoe/span_token.py | 2 +- mistletoe/token.py | 29 ++++++++++++++++++++++++++++- mistletoe/utils.py | 4 ++-- test/test_block_token.py | 12 ++++++++++++ test/test_span_token.py | 10 ++++++++-- 8 files changed, 57 insertions(+), 8 deletions(-) diff --git a/dev-guide.md b/dev-guide.md index 881f1e00..f81596bf 100644 --- a/dev-guide.md +++ b/dev-guide.md @@ -22,6 +22,10 @@ In this category you will find tokens like `RawText`, `Link`, and `Emphasis`. Block tokens may have block tokens, span tokens, or no tokens at all as children in the AST; this depends on the type of token. Span tokens may *only* have span tokens as children. +Every token has properties called `children` and `parent` that can be used +for traversing the nodes hierarchy. + +### Viewing the AST In order to see what exactly gets parsed, one can simply use the `AstRenderer` on a given markdown input, for example: diff --git a/mistletoe/ast_renderer.py b/mistletoe/ast_renderer.py index 65d88e70..5ffe2788 100644 --- a/mistletoe/ast_renderer.py +++ b/mistletoe/ast_renderer.py @@ -42,7 +42,7 @@ def get_ast(token): node[attrname] = getattr(token, attrname) if 'header' in vars(token): node['header'] = get_ast(getattr(token, 'header')) - if 'children' in vars(token): + if token.children is not None: node['children'] = [get_ast(child) for child in token.children] return node diff --git a/mistletoe/html_renderer.py b/mistletoe/html_renderer.py index 340e0d4a..cc24d2dc 100644 --- a/mistletoe/html_renderer.py +++ b/mistletoe/html_renderer.py @@ -49,7 +49,7 @@ def __exit__(self, *args): super().__exit__(*args) def render_to_plain(self, token) -> str: - if hasattr(token, 'children'): + if token.children is not None: inner = [self.render_to_plain(child) for child in token.children] return ''.join(inner) return html.escape(token.content) diff --git a/mistletoe/span_token.py b/mistletoe/span_token.py index 6671ff86..86f0c887 100644 --- a/mistletoe/span_token.py +++ b/mistletoe/span_token.py @@ -71,7 +71,7 @@ def __init__(self, match): self.content = match.group(self.parse_group) def __contains__(self, text): - if hasattr(self, 'children'): + if self.children is not None: return any(text in child for child in self.children) return text in self.content diff --git a/mistletoe/token.py b/mistletoe/token.py index c4ceb12b..99f63c8b 100644 --- a/mistletoe/token.py +++ b/mistletoe/token.py @@ -1,3 +1,5 @@ +from typing import Iterable, Optional + """ Base token class. """ @@ -54,7 +56,7 @@ def __repr__(self): self.__class__.__name__ ) - if "children" in vars(self): + if self.children is not None: count = len(self.children) if count == 1: output += " with 1 child" @@ -69,3 +71,28 @@ def __repr__(self): output += " {}={}".format(attrname, _short_repr(attrvalue)) output += " at {:#x}>".format(id(self)) return output + + @property + def parent(self) -> Optional['Token']: + """Returns the parent token, if there is any.""" + return getattr(self, '_parent', None) + + @property + def children(self) -> Optional[Iterable['Token']]: + """ + Returns the child (nested) tokens. + Returns `None` if the token is a leaf token. + """ + return getattr(self, '_children', None) + + @children.setter + def children(self, value: Iterable['Token']): + """" + Sets new child (nested) tokens. + Passed tokens are iterated and their ``parent`` property is set to + this token. + """ + self._children = value + if value: + for child in value: + child._parent = self diff --git a/mistletoe/utils.py b/mistletoe/utils.py index fae0863c..f7dd0bef 100644 --- a/mistletoe/utils.py +++ b/mistletoe/utils.py @@ -20,7 +20,7 @@ def traverse(source, klass=None, depth=None, include_source=False): current_depth = 0 if include_source and (klass is None or isinstance(source, klass)): yield TraverseResult(source, None, current_depth) - next_children = [(source, c) for c in getattr(source, 'children', [])] + next_children = [(source, c) for c in source.children or []] while next_children and (depth is None or current_depth < depth): current_depth += 1 new_children = [] @@ -28,6 +28,6 @@ def traverse(source, klass=None, depth=None, include_source=False): if klass is None or isinstance(child, klass): yield TraverseResult(child, parent, current_depth) new_children.extend( - [(child, c) for c in getattr(child, 'children', [])] + [(child, c) for c in child.children or []] ) next_children = new_children diff --git a/test/test_block_token.py b/test/test_block_token.py index f19f246e..d88360d8 100644 --- a/test/test_block_token.py +++ b/test/test_block_token.py @@ -616,6 +616,18 @@ def test_contains(self): self.assertFalse('foo' in token) +class TestParent(unittest.TestCase): + def test_parent(self): + lines = ['# heading\n', '\n', 'paragraph\n'] + token = block_token.Document(lines) + self.assertEqual(len(token.children), 2) + self.assertIsNone(token.parent) + for child in token.children: + self.assertEqual(child.parent, token) + for grandchild in child.children: + self.assertEqual(grandchild.parent, child) + + class TestHtmlBlock(unittest.TestCase): def setUp(self): block_token.add_token(block_token.HtmlBlock) diff --git a/test/test_span_token.py b/test/test_span_token.py index efb12aa7..700bc189 100644 --- a/test/test_span_token.py +++ b/test/test_span_token.py @@ -154,8 +154,7 @@ def test_attribute(self): def test_no_children(self): token = span_token.RawText('some text') - with self.assertRaises(AttributeError): - token.children + self.assertIsNone(token.children) def test_valid_html_entities(self): tokens = span_token.tokenize_inner('  合') @@ -192,6 +191,13 @@ def test_contains(self): self.assertFalse('foo' in token) +class TestParent(unittest.TestCase): + def test_parent(self): + token, = span_token.tokenize_inner('**some text**') + self.assertIsInstance(token.children[0], span_token.RawText) + self.assertEqual(token.children[0].parent, token) + + class TestHtmlSpan(unittest.TestCase): def setUp(self): span_token.add_token(span_token.HtmlSpan)