From 06f2a937e285345a15a846fe2114d8f549c9ea84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Bodn=C3=A1r?=
Date: Sat, 27 Jan 2024 08:53:24 +0100
Subject: [PATCH] feat!: add parent property to tokens (#71)
At the same time, we also make the `children`
attribute a _property_. This makes it easier for us
to set the `parent` to every child token, without
having to change much of the existing code.
---
dev-guide.md | 4 ++++
mistletoe/ast_renderer.py | 2 +-
mistletoe/html_renderer.py | 2 +-
mistletoe/span_token.py | 2 +-
mistletoe/token.py | 29 ++++++++++++++++++++++++++++-
mistletoe/utils.py | 4 ++--
test/test_block_token.py | 12 ++++++++++++
test/test_span_token.py | 10 ++++++++--
8 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/dev-guide.md b/dev-guide.md
index 881f1e00..f81596bf 100644
--- a/dev-guide.md
+++ b/dev-guide.md
@@ -22,6 +22,10 @@ In this category you will find tokens like `RawText`, `Link`, and `Emphasis`.
Block tokens may have block tokens, span tokens, or no tokens at all as children
in the AST; this depends on the type of token. Span tokens may *only* have span
tokens as children.
+Every token has properties called `children` and `parent` that can be used
+for traversing the nodes hierarchy.
+
+### Viewing the AST
In order to see what exactly gets parsed, one can simply use the `AstRenderer`
on a given markdown input, for example:
diff --git a/mistletoe/ast_renderer.py b/mistletoe/ast_renderer.py
index 65d88e70..5ffe2788 100644
--- a/mistletoe/ast_renderer.py
+++ b/mistletoe/ast_renderer.py
@@ -42,7 +42,7 @@ def get_ast(token):
node[attrname] = getattr(token, attrname)
if 'header' in vars(token):
node['header'] = get_ast(getattr(token, 'header'))
- if 'children' in vars(token):
+ if token.children is not None:
node['children'] = [get_ast(child) for child in token.children]
return node
diff --git a/mistletoe/html_renderer.py b/mistletoe/html_renderer.py
index 340e0d4a..cc24d2dc 100644
--- a/mistletoe/html_renderer.py
+++ b/mistletoe/html_renderer.py
@@ -49,7 +49,7 @@ def __exit__(self, *args):
super().__exit__(*args)
def render_to_plain(self, token) -> str:
- if hasattr(token, 'children'):
+ if token.children is not None:
inner = [self.render_to_plain(child) for child in token.children]
return ''.join(inner)
return html.escape(token.content)
diff --git a/mistletoe/span_token.py b/mistletoe/span_token.py
index 6671ff86..86f0c887 100644
--- a/mistletoe/span_token.py
+++ b/mistletoe/span_token.py
@@ -71,7 +71,7 @@ def __init__(self, match):
self.content = match.group(self.parse_group)
def __contains__(self, text):
- if hasattr(self, 'children'):
+ if self.children is not None:
return any(text in child for child in self.children)
return text in self.content
diff --git a/mistletoe/token.py b/mistletoe/token.py
index c4ceb12b..99f63c8b 100644
--- a/mistletoe/token.py
+++ b/mistletoe/token.py
@@ -1,3 +1,5 @@
+from typing import Iterable, Optional
+
"""
Base token class.
"""
@@ -54,7 +56,7 @@ def __repr__(self):
self.__class__.__name__
)
- if "children" in vars(self):
+ if self.children is not None:
count = len(self.children)
if count == 1:
output += " with 1 child"
@@ -69,3 +71,28 @@ def __repr__(self):
output += " {}={}".format(attrname, _short_repr(attrvalue))
output += " at {:#x}>".format(id(self))
return output
+
+ @property
+ def parent(self) -> Optional['Token']:
+ """Returns the parent token, if there is any."""
+ return getattr(self, '_parent', None)
+
+ @property
+ def children(self) -> Optional[Iterable['Token']]:
+ """
+ Returns the child (nested) tokens.
+ Returns `None` if the token is a leaf token.
+ """
+ return getattr(self, '_children', None)
+
+ @children.setter
+ def children(self, value: Iterable['Token']):
+ """"
+ Sets new child (nested) tokens.
+ Passed tokens are iterated and their ``parent`` property is set to
+ this token.
+ """
+ self._children = value
+ if value:
+ for child in value:
+ child._parent = self
diff --git a/mistletoe/utils.py b/mistletoe/utils.py
index fae0863c..f7dd0bef 100644
--- a/mistletoe/utils.py
+++ b/mistletoe/utils.py
@@ -20,7 +20,7 @@ def traverse(source, klass=None, depth=None, include_source=False):
current_depth = 0
if include_source and (klass is None or isinstance(source, klass)):
yield TraverseResult(source, None, current_depth)
- next_children = [(source, c) for c in getattr(source, 'children', [])]
+ next_children = [(source, c) for c in source.children or []]
while next_children and (depth is None or current_depth < depth):
current_depth += 1
new_children = []
@@ -28,6 +28,6 @@ def traverse(source, klass=None, depth=None, include_source=False):
if klass is None or isinstance(child, klass):
yield TraverseResult(child, parent, current_depth)
new_children.extend(
- [(child, c) for c in getattr(child, 'children', [])]
+ [(child, c) for c in child.children or []]
)
next_children = new_children
diff --git a/test/test_block_token.py b/test/test_block_token.py
index f19f246e..d88360d8 100644
--- a/test/test_block_token.py
+++ b/test/test_block_token.py
@@ -616,6 +616,18 @@ def test_contains(self):
self.assertFalse('foo' in token)
+class TestParent(unittest.TestCase):
+ def test_parent(self):
+ lines = ['# heading\n', '\n', 'paragraph\n']
+ token = block_token.Document(lines)
+ self.assertEqual(len(token.children), 2)
+ self.assertIsNone(token.parent)
+ for child in token.children:
+ self.assertEqual(child.parent, token)
+ for grandchild in child.children:
+ self.assertEqual(grandchild.parent, child)
+
+
class TestHtmlBlock(unittest.TestCase):
def setUp(self):
block_token.add_token(block_token.HtmlBlock)
diff --git a/test/test_span_token.py b/test/test_span_token.py
index efb12aa7..700bc189 100644
--- a/test/test_span_token.py
+++ b/test/test_span_token.py
@@ -154,8 +154,7 @@ def test_attribute(self):
def test_no_children(self):
token = span_token.RawText('some text')
- with self.assertRaises(AttributeError):
- token.children
+ self.assertIsNone(token.children)
def test_valid_html_entities(self):
tokens = span_token.tokenize_inner(' 合')
@@ -192,6 +191,13 @@ def test_contains(self):
self.assertFalse('foo' in token)
+class TestParent(unittest.TestCase):
+ def test_parent(self):
+ token, = span_token.tokenize_inner('**some text**')
+ self.assertIsInstance(token.children[0], span_token.RawText)
+ self.assertEqual(token.children[0].parent, token)
+
+
class TestHtmlSpan(unittest.TestCase):
def setUp(self):
span_token.add_token(span_token.HtmlSpan)